public void TestTimeoutNotParsable() { CommManager.reset(); _config = new Dictionary <string, string> { { "url", Properties.Settings.Default.url }, { "reportGroup", "Default Report Group" }, { "username", "DOTNET" }, { "version", "11.0" }, { "timeout", "notparsableasint" }, { "merchantId", "101" }, { "password", "TESTCASE" }, { "printxml", "true" }, { "proxyHost", Properties.Settings.Default.proxyHost }, { "proxyPort", Properties.Settings.Default.proxyPort }, { "logFile", Properties.Settings.Default.logFile }, { "neuterAccountNums", "true" } }; _cnp = new CnpOnline(_config); var registerTokenRequest = new registerTokenRequestType { id = "1", reportGroup = "Planets", orderId = "12344", accountNumber = "1233456789103801", }; var rtokenResponse = _cnp.RegisterToken(registerTokenRequest); StringAssert.AreEqualIgnoringCase("Account number was successfully registered", rtokenResponse.message); }
public void TestTimeoutReached() { CommManager.reset(); _config = new Dictionary <string, string> { { "url", Properties.Settings.Default.url }, { "reportGroup", "Default Report Group" }, { "username", "DOTNET" }, { "version", "11.0" }, { "timeout", "0" }, { "merchantId", "101" }, { "password", "TESTCASE" }, { "printxml", "true" }, { "proxyHost", Properties.Settings.Default.proxyHost }, { "proxyPort", Properties.Settings.Default.proxyPort }, { "logFile", Properties.Settings.Default.logFile }, { "neuterAccountNums", "true" } }; _cnp = new CnpOnline(_config); var registerTokenRequest = new registerTokenRequestType { id = "1", reportGroup = "Planets", orderId = "12344", accountNumber = "1233456789103801", }; Assert.Throws <WebException>(() => { _cnp.RegisterToken(registerTokenRequest); }); }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary <string, ClusterJobInformation>(); if (!Directory.Exists(this.config.JobsFolder)) { return; } string[] subfolders = Directory.GetDirectories(this.config.JobsFolder); int done = 0; foreach (var job in subfolders) { manager.Token.ThrowIfCancellationRequested(); string jobId = Path.GetFileName(job); ClusterJobInformation info = this.GetJobInfo(job, jobId); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(jobId, info); } manager.Progress(done++ *100 / subfolders.Length); } manager.Progress(100); }
public void testInstanceMultiSite() { Dictionary <string, string> _config = new Dictionary <string, string> { { "proxyHost", "websenseproxy" }, { "proxyPort", "8080" }, { "multiSite", "true" }, { "printxml", "false" }, { "printMultiSiteDebug", "true" }, { "merchantId", "101" }, { "username", "DOTNET" }, { "password", "TESTCASE" }, { "url", legacyUrl }, { "multiSiteUrl1", site1Url }, { "multiSiteUrl2", site2Url }, { "multiSiteErrorThreshold", "4" }, { "maxHoursWithoutSwitch", "48" } }; CommManager.reset(); CommManager cmg = CommManager.instance(_config); Assert.IsNotNull(cmg); Assert.IsTrue(cmg.getMultiSite()); Assert.AreEqual(cmg.getMultiSiteThreshold(), 4); Assert.AreEqual(cmg.getMultiSiteUrls().Count(), 2); }
public MessageHandler(CommManager comms) { Comms = comms; IncomingMessages = new List <RawMessage>(); OutgoingMessages = new List <RawMessage>(); customMessageHandler = new CustomMessageHandler(); }
public static void RunApp() { // Inject platform-specific logic try { using (var ts = new CancellationTokenSource()) using (var solarcar = new CommManager()) { Task solarcar_loop = solarcar.BusinessLoop(ts.Token); // System.Diagnostics.Process.Start(Config.HTTPSERVER_CAR_URL); System.Console.WriteLine("Press any key to halt..."); System.Console.ReadKey(); ts.Cancel(); solarcar_loop.Wait(); } } catch (Exception e) { Debug.WriteLine("PROGRAM:\tEXCEPTION: " + e.ToString()); } finally { Debug.WriteLine("PROGRAM:\tRun finished"); } }
/// <summary> /// Recompute the list of jobs on the cluster and add them to the clusterJobs field. /// </summary> /// <param name="virtualCluster">Unused.</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary <string, ClusterJobInformation>(); if (string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) { return; } string joblist = Path.Combine(CachedClusterResidentObject.CacheDirectory, "jobs"); if (!Directory.Exists(joblist)) { Directory.CreateDirectory(joblist); } string[] files = Directory.GetFiles(joblist, "*.xml"); foreach (var file in files) { manager.Token.ThrowIfCancellationRequested(); DryadLinqJobSummary job = Utilities.LoadXml <DryadLinqJobSummary>(file); string cjid = job.Cluster + "-" + job.ClusterJobId; // there may be two jobs with same id from different clusters ClusterJobInformation ci = new ClusterJobInformation(this.Config.Name, job.Cluster, cjid, job.Name, job.User, job.Date, job.EndTime - job.Date, job.Status); ci.SetAssociatedSummary(job); if (this.clusterJobs.ContainsKey(cjid)) { manager.Status("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error); continue; } this.clusterJobs.Add(cjid, ci); } manager.Progress(100); }
public void testFindUrl_MultiSite1() { Dictionary <string, string> _config = new Dictionary <string, string> { { "proxyHost", "websenseproxy" }, { "proxyPort", "8080" }, { "printxml", "false" }, { "merchantId", "101" }, { "username", "DOTNET" }, { "password", "TESTCASE" }, { "multiSite", "true" }, { "printMultiSiteDebug", "true" }, { "url", legacyUrl }, { "multiSiteUrl1", site1Url }, { "multiSiteUrl2", site2Url }, { "multiSiteErrorThreshold", "4" }, { "maxHoursWithoutSwitch", "48" } }; CommManager.reset(); CommManager cmg = CommManager.instance(_config); Assert.IsNotNull(cmg); Assert.IsTrue(cmg.getMultiSite()); RequestTarget rt = cmg.findUrl(); Assert.AreEqual(cmg.getMultiSiteUrls()[cmg.getCurrentMultiSiteUrlIndex()], rt.getUrl()); Assert.True(rt.getUrl().Equals(site1Url) || rt.getUrl().Equals(site2Url)); }
public PerformanceTest(long idNumber) { threadId = idNumber; Dictionary <string, string> _config = new Dictionary <string, string>(); try { CommManager.reset(); _config = new Dictionary <string, string> { { "proxyHost", "websenseproxy" }, { "proxyPort", "8080" }, { "multiSite", "true" }, { "printxml", "false" }, { "printMultiSiteDebug", "false" }, { "merchantId", "101" }, { "username", "DOTNET" }, { "password", "TESTCASE" } }; cnp = new CnpOnline(_config); } catch (Exception e) { // TODO Auto-generated catch block Console.WriteLine(e.ToString()); } }
public void runPerformanceTest() { Random rand = new Random(); long startTime = DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond; long totalTransactionTime = 0; for (int n = 0; n < cycleCount; n++) { requestCount++; RequestTarget target = CommManager.instance(config).findUrl(); try { int sleepTime = 100 + rand.Next(500); totalTransactionTime += sleepTime; Thread.Sleep(sleepTime); } catch (Exception e) { Console.WriteLine(e.ToString()); } CommManager.instance(config).reportResult(target, CommManager.REQUEST_RESULT_RESPONSE_RECEIVED, 200); } long duration = DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond - startTime; Console.WriteLine("Thread " + threadId + " completed. Total Requests:" + requestCount + " Elapsed Time:" + (duration / 1000) + " secs Average Txn Time:" + (totalTransactionTime / requestCount) + " ms"); }
public void SetUp() { CommManager.reset(); ConfigManager configManager = new ConfigManager(); _config = configManager.getConfig(); //_config = new Dictionary<string, string>(); //_config["url"] = Properties.Settings.Default.url; //_config["reportGroup"] = Properties.Settings.Default.reportGroup; _config["username"] = Environment.GetEnvironmentVariable("encUsername"); //_config["printxml"] = Properties.Settings.Default.printxml; //_config["timeout"] = Properties.Settings.Default.timeout; //_config["proxyHost"] = Properties.Settings.Default.proxyHost; _config["merchantId"] = Environment.GetEnvironmentVariable("encMerchantId"); _config["password"] = Environment.GetEnvironmentVariable("encPassword").Replace("\"", ""); //_config["proxyPort"] = Properties.Settings.Default.proxyPort; //_config["sftpUrl"] = Properties.Settings.Default.sftpUrl; _config["sftpUsername"] = Environment.GetEnvironmentVariable("encSftpUsername"); _config["sftpPassword"] = Environment.GetEnvironmentVariable("encSftpPassword"); //_config["knownHostsFile"] = Properties.Settings.Default.knownHostsFile; //_config["requestDirectory"] = Properties.Settings.Default.requestDirectory; //_config["responseDirectory"] = Properties.Settings.Default.responseDirectory; _config["useEncryption"] = "true"; _config["vantivPublicKeyId"] = Environment.GetEnvironmentVariable("vantivPublicKeyId"); _config["pgpPassphrase"] = Environment.GetEnvironmentVariable("pgpPassphrase"); }
/// <summary> /// Create a FailureDiagnosis object. /// </summary> /// <param name="job">Job being diagnosed.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="manager">Communication manager.</param> protected FailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { this.Job = job; this.StaticPlan = plan; this.Manager = manager; this.Summary = job.Summary; this.cluster = job.ClusterConfiguration; }
/// <summary> /// Disconnects the device using its CommManager. /// </summary> /// <param name="comm">The device's CommManager.</param> /// <exception cref="System.ArgumentNullException">The specified CommManager cannot be null.</exception> internal void Disconnect(CommManager comm) { if (comm == null) { throw new ArgumentNullException("The specified CommManager cannot be null."); } comm.Disconnect(); comm = null; }
/// <summary> /// Look to see whether the vertices failed reading from some common set of machines. /// This is incomplete: e.g., it does not work for tidyfs streams. /// </summary> /// <returns>Yes if there were correlated failures.</returns> /// <param name="manager">Communication manager.</param> protected Decision LookForCorrelatedReadFailures(CommManager manager) { // if we have more than this many failures we start to worry const int maxFailures = 5; IEnumerable <ExecutedVertexInstance> failures = this.Job.Vertices.Where(v => v.State == ExecutedVertexInstance.VertexState.Failed). Where(v => !v.IsManager). ToList(); int totalFailures = failures.Count(); if (totalFailures < maxFailures) { return(Decision.No); } List <ChannelEndpointDescription> channelsFailed = new List <ChannelEndpointDescription>(); int verticesDone = 0; foreach (ExecutedVertexInstance v in failures) { var crf = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.StaticPlan, v, manager).ChannelReadFailure(manager); if (crf != null) { channelsFailed.Add(crf); } verticesDone++; manager.Progress(verticesDone * 100 / totalFailures); } if (channelsFailed.Count() < maxFailures) { return(Decision.No); } this.Log(DiagnosisMessage.Importance.Final, "There are " + channelsFailed.Count() + " read failures in the job", ""); var files = channelsFailed.Where(ced => ced.UriType == "file").ToList(); if (files.Count() == 0) { this.Log(DiagnosisMessage.Importance.Final, "All channels with failures are distributed files", "No further information is available"); return(Decision.Dontknow); } Decision result = Decision.Dontknow; var machines = files.Select(f => new UNCPathname(f.LocalPath).Machine).GroupBy(w => w).ToList(); foreach (var m in machines) { int failuresOnM = m.Count(); if (failuresOnM > 3) { this.Log(DiagnosisMessage.Importance.Final, "There are " + failuresOnM + " read failures reading from machine", m.Key); result = Decision.Yes; } } return(result); }
public static Client GenerateMyClient(CommManager comms) { long ID = comms.ship.Me.Me.EntityId; long GridID = comms.ship.Me.Me.CubeGrid.EntityId; var cl = new Client(ID, GridID, comms); cl.Name = comms.ship.ControllerBlock.CubeGrid.CustomName; return(cl); }
public static Client GetFromSerializer(BinarySerializer buf, CommManager comms) { long id = buf.ReadLong(); long gridid = buf.ReadLong(); string name = buf.ReadString(); return(new Client(id, gridid, comms) { Name = name }); }
/// <summary> /// Refresh the job summary status. /// </summary> /// <param name="summary">Summary to refresh.</param> /// <param name="manager">Communication manager.</param> public override void RefreshStatus(DryadLinqJobSummary summary, CommManager manager) { ClusterJobInformation info = this.GetJobInfo(summary.JobID); if (info == null) { summary.Status = ClusterJobInformation.ClusterJobStatus.Unknown; return; } summary.Status = info.Status; }
/// <summary> /// Refresh the job summary status. /// </summary> /// <param name="summary">Summary to refresh.</param> /// <param name="manager">Communication manager.</param> public virtual void RefreshStatus(DryadLinqJobSummary summary, CommManager manager) { // refresh the whole list: too expensive // this.RecomputeClusterJobList(summary.VirtualCluster, manager); ClusterJobInformation info = this.DiscoverClusterJob(summary, manager); if (info == null) { summary.Status = ClusterJobInformation.ClusterJobStatus.Unknown; return; } summary.Status = info.Status; }
/// <summary> /// Try to find the job information from cluster and summary. /// </summary> /// <param name="manager">Communication manager.</param> protected void FindJobInfo(CommManager manager) { DryadLinqJobInfo jobinfo = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.cluster, this.Summary, true, manager); if (jobinfo == null) { manager.Status("Cannot collect information for " + Summary.ShortName() + " to diagnose", StatusKind.Error); return; } this.Job = jobinfo; this.StaticPlan = JobObjectModel.DryadJobStaticPlan.CreatePlan(jobinfo, manager); }
public void setup() { CommManager.reset(); _config = new Dictionary <string, string> { { "proxyHost", "websenseproxy" }, { "proxyPort", "8080" }, { "multiSite", "true" }, { "printxml", "true" }, { "printMultiSiteDebug", "false" }, { "url", Properties.Settings.Default.url } }; }
public void Dispose() { if (Connected) { _comm.Dispose(); } _comm = null; ActiveMeasurement = null; Disconnected = null; MeasurementEnded = null; MeasurementStarted = null; ReceiveStatus = null; StateChanged = null; SimpleCurveStartReceivingData = null; }
/// <summary> /// Create a VertexFailureDiagnosis of the appropriate type. /// </summary> /// <param name="vertex">Vertex to diagnose.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of VertexFailureDiagnosis.</returns> /// <param name="plan">Plan of the executed job.</param> public static VertexFailureDiagnosis CreateVertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) { config = (config as CacheClusterConfiguration).ActualConfig(job.Summary); } throw new InvalidOperationException("Config of type " + config.TypeOfCluster + " not handled"); }
/// <summary> /// Refresh the job summary status. /// </summary> /// <param name="job">Summary to refresh.</param> /// <param name="manager">Communication manager.</param> public override void RefreshStatus(DryadLinqJobSummary job, CommManager manager) { ClusterConfiguration actual = (this.Config as CacheClusterConfiguration).ActualConfig(job); ClusterStatus actualStatus = actual.CreateClusterStatus(); actualStatus.RefreshStatus(job, manager); ClusterJobInformation info = actualStatus.DiscoverClusterJob(job, manager); if (info == null) { job.Status = ClusterJobInformation.ClusterJobStatus.Unknown; return; } job.Status = info.Status; }
public void testFindUrl_MultiSiteMaxHours() { // test that url is switched when number of hours since last switch exceeds threshold Dictionary <string, string> _config = new Dictionary <string, string> { { "proxyHost", "websenseproxy" }, { "proxyPort", "8080" }, { "printxml", "false" }, { "merchantId", "101" }, { "username", "DOTNET" }, { "password", "TESTCASE" }, { "multiSite", "true" }, { "printMultiSiteDebug", "true" }, { "url", legacyUrl }, { "multiSiteUrl1", site1Url }, { "multiSiteUrl2", site2Url }, { "multiSiteErrorThreshold", "3" }, { "maxHoursWithoutSwitch", "4" } }; CommManager.reset(); CommManager cmg = CommManager.instance(_config); Assert.IsNotNull(cmg); Assert.IsTrue(cmg.getMultiSite()); Assert.AreEqual(cmg.getMultiSiteThreshold(), 3); RequestTarget rt1 = cmg.findUrl(); Assert.AreEqual(cmg.getMultiSiteUrls()[cmg.getCurrentMultiSiteUrlIndex()], rt1.getUrl()); cmg.reportResult(rt1, CommManager.REQUEST_RESULT_RESPONSE_RECEIVED, 200); RequestTarget rt2 = cmg.findUrl(); Assert.AreEqual(rt1.getUrl(), rt2.getUrl()); cmg.reportResult(rt2, CommManager.REQUEST_RESULT_RESPONSE_RECEIVED, 200); // set last switch time to 6 hours earlier DateTime gc = new DateTime(cmg.getLastSiteSwitchTime() * 10000); gc = gc.Add(new System.TimeSpan(-6, 0, 0)); cmg.setLastSiteSwitchTime((gc.Ticks / 10000)); RequestTarget rt3 = cmg.findUrl(); Assert.IsFalse(rt3.getUrl().Equals(rt1.getUrl())); }
public void testFindUrl_MultiSite2() { // test that url is switched when errors reach threshold Dictionary <string, string> _config = new Dictionary <string, string> { { "proxyHost", "websenseproxy" }, { "proxyPort", "8080" }, { "printxml", "false" }, { "merchantId", "101" }, { "username", "DOTNET" }, { "password", "TESTCASE" }, { "multiSite", "true" }, { "printMultiSiteDebug", "false" }, { "url", legacyUrl }, { "multiSiteUrl1", site1Url }, { "multiSiteUrl2", site2Url }, { "multiSiteErrorThreshold", "3" }, { "maxHoursWithoutSwitch", "48" } }; CommManager.reset(); CommManager cmg = CommManager.instance(_config); Assert.IsNotNull(cmg); Assert.IsTrue(cmg.getMultiSite()); Assert.AreEqual(cmg.getMultiSiteThreshold(), 3); RequestTarget rt1 = cmg.findUrl(); Assert.AreEqual(cmg.getMultiSiteUrls()[cmg.getCurrentMultiSiteUrlIndex()], rt1.getUrl()); cmg.reportResult(rt1, CommManager.REQUEST_RESULT_RESPONSE_TIMEOUT, 0); RequestTarget rt2 = cmg.findUrl(); Assert.AreEqual(rt1.getUrl(), rt2.getUrl()); cmg.reportResult(rt2, CommManager.REQUEST_RESULT_RESPONSE_TIMEOUT, 0); RequestTarget rt3 = cmg.findUrl(); Assert.AreEqual(rt1.getUrl(), rt3.getUrl()); cmg.reportResult(rt3, CommManager.REQUEST_RESULT_RESPONSE_TIMEOUT, 0); Assert.AreEqual(cmg.getErrorCount(), 3); RequestTarget rt4 = cmg.findUrl(); Assert.IsFalse(rt4.getUrl().Equals(rt1.getUrl())); }
/// <summary> /// Detect whether the vertex had problems reading a particular channel. /// </summary> /// <returns>The channel that cannot be read, or null if that's not the problem.</returns> /// <param name="manager">Communication manager.</param> public virtual ChannelEndpointDescription ChannelReadFailure(CommManager manager) { List <string> stack = this.StackTrace().ToList(); if (stack.Count == 0) { return(null); } string firstLine = stack.First(); Regex errorMsg = new Regex(@"(.*)Exception: (.*)ailed to read from input channel at port (\d+)"); Match m = errorMsg.Match(firstLine); if (!m.Success) { return(null); } int channelNo; bool success = int.TryParse(m.Groups[3].Value, out channelNo); if (!success) { return(null); } try { this.Vertex.DiscoverChannels(true, false, true, manager); var channels = this.Vertex.InputChannels; if (channels == null) { return(null); } if (channels.Count < channelNo) { this.Log(DiagnosisMessage.Importance.Error, "Could not discover channel " + channelNo, this.VertexName); return(null); } return(channels[channelNo]); } catch (Exception) { return(null); } }
public async Task SolarCarMain() { try { using (var solarcar = new CommManager()) { await solarcar.BusinessLoop(this.solarcar_cancel.Token); } } catch (Exception e) { Debug.WriteLine("PROGRAM: EXCEPTION: " + e.ToString()); } finally { this.mainWindowController.Close(); } }
static void Main() { Application.ThreadException += new System.Threading.ThreadExceptionEventHandler(Application_ThreadException); //捕获线程未处理异常 AppDomain.CurrentDomain.UnhandledException += CurrentDomain_UnhandledException; CommandProcessor.instance().Start(false); Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); string[] f = Assembly.GetEntryAssembly().GetManifestResourceNames(); Infragistics.Win.AppStyling.StyleManager.Load(System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream("SystemMon.Skin.MyElectricBlue.isl")); Application.Run(new Form_Main()); CommManager.instance().Stop(); CommandProcessor.instance().Stop(); }
public void SetUp() { CommManager.reset(); Dictionary <string, string> config = new Dictionary <string, string>(); config.Add("url", "https://payments.vantivprelive.com/vap/communicator/online"); config.Add("reportGroup", "Default Report Group"); config.Add("username", Properties.Settings.Default.username); config.Add("timeout", "500"); config.Add("merchantId", Properties.Settings.Default.merchantId); config.Add("password", Properties.Settings.Default.password); config.Add("printxml", "true"); config.Add("logFile", null); config.Add("neuterAccountNums", null); config.Add("proxyHost", Properties.Settings.Default.proxyHost); config.Add("proxyPort", Properties.Settings.Default.proxyPort); config.Add("multiSite", "false"); cnp = new CnpOnline(config); }
/// <summary> /// Stops the management service. /// </summary> public void StopService() { // stop comm manager if (commManager != null) { try { commManager.Stop(); commManager = null; } catch (ThreadAbortException tae) { Messages.StackTrace(tae, false); } } // delete process lock file File.Delete(lockFile); Messages.Trace("stopped service"); }
/// <summary> /// Starts the management service. /// </summary> public void StartService() { commManager = new CommManager(); commManager.Start(); // create service lock file FileStream _lock = File.Open(lockFile, FileMode.Create); // create stream writer and write this processes PID to the lock file StreamWriter stream = new StreamWriter(_lock); Process proc = Process.GetCurrentProcess(); stream.WriteLine(proc.Id); stream.Flush(); // dispose the file stream _lock.Dispose(); HasCompletedStartup = true; Messages.Trace("started service"); }
public void SetUpCnp() { CommManager.reset(); _config = new Dictionary <string, string> { { "url", Properties.Settings.Default.url }, { "reportGroup", "Default Report Group" }, { "username", "DOTNET" }, { "version", "11.0" }, { "timeout", "5000" }, { "merchantId", "101" }, { "password", "TESTCASE" }, { "printxml", "true" }, { "proxyHost", Properties.Settings.Default.proxyHost }, { "proxyPort", Properties.Settings.Default.proxyPort }, { "logFile", Properties.Settings.Default.logFile }, { "neuterAccountNums", "true" } }; _cnp = new CnpOnline(_config); }
/// <summary> /// Create a class to diagnose the problems of a job. /// </summary> /// <param name="config">Cluster where job resides.</param> /// <param name="manager">Communication manager.</param> /// <param name="summary">Job summary.</param> protected JobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) : base(config, summary, manager) { this.diagnosisLog = new DiagnosisLog(this.Job, summary); if (this.Job != null) this.jobManager = this.Job.ManagerVertex; }
/// <summary> /// Create a class to diagnose the problems of a job. /// </summary> /// <param name="job">Job to diagnose.</param> /// <param name="plan">Plan of the diagnosed job.</param> /// <param name="manager">Communication manager.</param> protected JobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) : base(job, plan, manager) { this.diagnosisLog = new DiagnosisLog(job, job.Summary); this.jobManager = this.Job.ManagerVertex; }
/// <summary> /// Recompute the list of jobs on the cluster and add them to the clusterJobs field. /// </summary> /// <param name="virtualCluster">Unused.</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary<string, ClusterJobInformation>(); if (string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) return; string joblist = Path.Combine(CachedClusterResidentObject.CacheDirectory, "jobs"); if (!Directory.Exists(joblist)) Directory.CreateDirectory(joblist); string[] files = Directory.GetFiles(joblist, "*.xml"); foreach (var file in files) { manager.Token.ThrowIfCancellationRequested(); DryadLinqJobSummary job = Utilities.LoadXml<DryadLinqJobSummary>(file); string cjid = job.Cluster + "-" + job.ClusterJobId; // there may be two jobs with same id from different clusters ClusterJobInformation ci = new ClusterJobInformation(this.Config.Name, job.Cluster, cjid, job.Name, job.User, job.Date, job.EndTime - job.Date, job.Status); ci.SetAssociatedSummary(job); if (this.clusterJobs.ContainsKey(cjid)) { manager.Status("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error); continue; } this.clusterJobs.Add(cjid, ci); } manager.Progress(100); }
/// <summary> /// Cancel a job. /// </summary> /// <param name="jobs">Jobs to cancel.</param> /// <param name="cluster">Cluster where the jobs are running.</param> /// <returns>True if all cancellations succeed.</returns> /// <param name="manager">Communicatoni manager.</param> // ReSharper disable once UnusedParameter.Global public static bool CancelJobs(IEnumerable<DryadLinqJobSummary> jobs, ClusterStatus cluster, CommManager manager) { bool done = true; foreach (DryadLinqJobSummary job in jobs) { manager.Token.ThrowIfCancellationRequested(); if (job.Status != ClusterJobInformation.ClusterJobStatus.Running) { manager.Status("Job " + job.Name + " does not appear to be running; will still try to cancel", StatusKind.Error); } bool success; string reason = ""; try { success = cluster.CancelJob(job); } catch (Exception ex) { success = false; reason = ex.Message; Trace.TraceInformation(ex.ToString()); } if (success) manager.Status("Job " + job.Name + " cancelled", StatusKind.OK); else manager.Status("Cancellation of " + job.Name + " failed " + reason, StatusKind.Error); done &= success; } return done; }
/// <summary> /// Start the job browser from a job summary. /// </summary> /// <param name="js">Job summary to browse.</param> private void browseFromJobSummary(DryadLinqJobSummary js) { if (js == null) return; // TODO: this should run in the background CommManager manager = new CommManager(this.Status, delegate { }, new System.Threading.CancellationTokenSource().Token); DryadLinqJobInfo job = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, js, false, manager); if (job != null) { JobBrowser browser = new JobBrowser(job); browser.Show(); this.Status("OK", StatusKind.OK); } else { this.Status("Could not find information about job", StatusKind.Error); } }
/// <summary> /// Look to see whether the vertices failed reading from some common set of machines. /// This is incomplete: e.g., it does not work for tidyfs streams. /// </summary> /// <returns>Yes if there were correlated failures.</returns> /// <param name="manager">Communication manager.</param> protected Decision LookForCorrelatedReadFailures(CommManager manager) { // if we have more than this many failures we start to worry const int maxFailures = 5; IEnumerable<ExecutedVertexInstance> failures = this.Job.Vertices.Where(v => v.State == ExecutedVertexInstance.VertexState.Failed). Where(v => !v.IsManager). ToList(); int totalFailures = failures.Count(); if (totalFailures < maxFailures) return Decision.No; List<ChannelEndpointDescription> channelsFailed = new List<ChannelEndpointDescription>(); int verticesDone = 0; foreach (ExecutedVertexInstance v in failures) { var crf = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.StaticPlan, v, manager).ChannelReadFailure(manager); if (crf != null) { channelsFailed.Add(crf); } verticesDone++; manager.Progress(verticesDone * 100 / totalFailures); } if (channelsFailed.Count() < maxFailures) return Decision.No; this.Log(DiagnosisMessage.Importance.Final, "There are " + channelsFailed.Count() + " read failures in the job", ""); var files = channelsFailed.Where(ced => ced.UriType == "file").ToList(); if (files.Count() == 0) { this.Log(DiagnosisMessage.Importance.Final, "All channels with failures are distributed files", "No further information is available"); return Decision.Dontknow; } Decision result = Decision.Dontknow; var machines = files.Select(f => new UNCPathname(f.LocalPath).Machine).GroupBy(w => w).ToList(); foreach (var m in machines) { int failuresOnM = m.Count(); if (failuresOnM > 3) { this.Log(DiagnosisMessage.Importance.Final, "There are " + failuresOnM + " read failures reading from machine", m.Key); result = Decision.Yes; } } return result; }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> // ReSharper disable once UnusedParameter.Global protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary<string, ClusterJobInformation>(); var uri = DfsFile.UriFromPath(this.config.JobsFolderUri, ""); var jobs = this.config.DfsClient.EnumerateSubdirectories(uri).ToList(); int done = 0; foreach (var job in jobs) { manager.Token.ThrowIfCancellationRequested(); ClusterJobInformation info = this.GetJobInfo(DfsFile.PathFromUri(this.config.JobsFolderUri, job)); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(info.ClusterJobID, info); } manager.Progress(100 * done++ / jobs.Count); } manager.Progress(100); }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary<string, ClusterJobInformation>(); var jobs = this.config.AzureClient.ExpandFileOrDirectory(AzureDfsFile.UriFromPath(this.config, "")).ToList(); int done = 0; foreach (var job in jobs) { manager.Token.ThrowIfCancellationRequested(); string jobRootFolder = AzureDfsFile.PathFromUri(this.config, job); ClusterJobInformation info = this.GetJobInfo(jobRootFolder); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(job.AbsolutePath, info); } manager.Progress(100*done++/jobs.Count); } manager.Progress(100); }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary<string, ClusterJobInformation>(); if (!Directory.Exists(this.config.JobsFolder)) return; string[] subfolders = Directory.GetDirectories(this.config.JobsFolder); int done = 0; foreach (var job in subfolders) { manager.Token.ThrowIfCancellationRequested(); string jobId = Path.GetFileName(job); ClusterJobInformation info = this.GetJobInfo(job, jobId); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(jobId, info); } manager.Progress(done++ *100/subfolders.Length); } manager.Progress(100); }
/// <summary> /// Not needed, all summaries are already known. /// </summary> /// <param name="job">Cluster job.</param> /// <returns>Throws an exception.</returns> /// <param name="manager">Communication manager.</param> public override ClusterJobInformation DiscoverClusterJob(DryadLinqJobSummary job, CommManager manager) { ClusterConfiguration actual = (this.Config as CacheClusterConfiguration).ActualConfig(job); ClusterStatus actualStatus = actual.CreateClusterStatus(); return actualStatus.DiscoverClusterJob(job, manager); }
/// <summary> /// Create a suitable Job Failure diagnosis object for the job being analyzed. /// </summary> /// <param name="job">Job to diagnose.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of JobFailureDiagnosis with the type appropriate for the job.</returns> /// <param name="plan">Plan of the job being diagnosed.</param> public static JobFailureDiagnosis CreateJobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(job.Summary); throw new InvalidOperationException("Configuration of type " + config.TypeOfCluster + " not supported for diagnosis"); }
/// <summary> /// Create a suitable Job Failure diagnosis object for the job being analyzed. /// </summary> /// <param name="summary">Job to diagnose.</param> /// <param name="config">Cluster where job resides.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of JobFailureDiagnosis with the type appropriate for the job.</returns> public static JobFailureDiagnosis CreateJobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) { if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(summary); throw new InvalidOperationException("Configuration of type " + config.TypeOfCluster + " not supported for diagnosis"); }
/// <summary> /// Talk to the web server and build the list of clustr jobs; used it to populate the upper panel. /// </summary> /// <param name="virtualCluster">Virtual cluster selected; defined only for Scope clusters.</param> /// <param name="manager">Communication manager.</param> /// <param name="status">Cluster to scan.</param> private static List<ClusterJobInformation> BuildClusterJobList(CommManager manager, ClusterStatus status, string virtualCluster) { return status.GetClusterJobList(virtualCluster, manager).ToList(); }
/// <summary> /// Create a failure diagnosis when the job info is not yet known. /// </summary> /// <param name="config">Cluster where job resides.</param> /// <param name="summary">Job summary.</param> /// <param name="manager">Communication manager.</param> protected FailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) { this.cluster = config; this.Summary = summary; this.Manager = manager; this.FindJobInfo(manager); }
/// <summary> /// Start a job browser on the specified job. /// </summary> /// <param name="sender">Unused.</param> /// <param name="e">Unused.</param> private void jobBrowserToolStripMenuItem_Click(object sender, EventArgs e) { IEnumerable<ClusterJobInformation> ti = this.SelectedJobs(); this.Status("Starting job browser...", StatusKind.LongOp); IEnumerable<DryadLinqJobSummary> jobs = ti.Select(t => t.DiscoverDryadLinqJob(this.clusterStatus, this.Status)).ToList(); CommManager manager = new CommManager(this.Status, delegate { }, new System.Threading.CancellationTokenSource().Token); IEnumerable<DryadLinqJobInfo> detailed = jobs.Select(j => DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, j, false, manager)); foreach (DryadLinqJobInfo j in detailed) { if (j == null) continue; JobBrowser jb = new JobBrowser(j); jb.Show(); } this.Status("OK", StatusKind.OK); }
// ReSharper restore UnusedParameter.Global /// <summary> /// Discover a cluster job given its id. /// </summary> /// <param name="job">Job to discover.</param> /// <returns>The cluster job, or null if not found.</returns> /// <param name="manager">Communication manager.</param> public virtual ClusterJobInformation DiscoverClusterJob(DryadLinqJobSummary job, CommManager manager) { if (this.clusterJobs == null) this.RecomputeClusterJobList(job.VirtualCluster, manager); return this.clusterJobs[job.ClusterJobId]; }
/// <summary> /// Diagnose a list of jobs. /// </summary> /// <param name="jobs">Jobs to diagnose.</param> /// <param name="config">Cluster configuration.</param> /// <param name="manager">Communicatino manager.</param> public static List<DiagnosisLog> DiagnoseJobs(IEnumerable<DryadLinqJobSummary> jobs, ClusterConfiguration config, CommManager manager) { var dryadLinqJobSummaries = jobs as DryadLinqJobSummary[] ?? jobs.ToArray(); int jobCount = dryadLinqJobSummaries.Count(); List<DiagnosisLog> result = new List<DiagnosisLog>(); int done = 0; foreach (DryadLinqJobSummary summary in dryadLinqJobSummaries) { if (summary == null) continue; manager.Token.ThrowIfCancellationRequested(); JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(config, summary, manager); manager.Status("Diagnosing " + summary.ShortName(), StatusKind.LongOp); DiagnosisLog log = diagnosis.Diagnose(); result.Add(log); done++; manager.Progress(done * 100 / jobCount); } manager.Status("Diagnosis complete", StatusKind.OK); return result; }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> // ReSharper disable once UnusedParameter.Global protected abstract void RecomputeClusterJobList(string virtualCluster, CommManager manager);
/// <summary> /// Create a class to diagnose the problems of a vertex. /// </summary> /// <param name="vertex">Vertex to diagnose.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="plan">Plan of the executed job.</param> /// <param name="manager">Communication manager.</param> protected VertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) : base(job, plan, manager) { this.Job = job; this.Vertex = vertex; // ReSharper disable once DoNotCallOverridableMethodsInConstructor this.stackTraceFile = "dryadLinqStackTrace.txt"; }
/// <summary> /// Cache the vertices in the list; executed on the background thread. /// </summary> /// <returns>True: success.</returns> /// <param name="manager">Communication manager.</param> /// <param name="config">Cluster configuration.</param> /// <param name="summary">Job to cache.</param> /// <param name="vertices">Vertices to cache.</param> private static bool CacheAllVertices( ClusterConfiguration config, DryadLinqJobSummary summary, List<ExecutedVertexInstance> vertices, CommManager manager) { int done = 0; int todo = vertices.Count; int files = 0; manager.Status("Caching data for " + todo + " vertices", StatusKind.LongOp); foreach (ExecutedVertexInstance v in vertices) { files += CacheVertexInfo(config, summary, v); done++; manager.Progress(done / todo); } manager.Progress(100); manager.Status("Cached " + files + " files", StatusKind.OK); return true; }
/// <summary> /// Create a VertexFailureDiagnosis of the appropriate type. /// </summary> /// <param name="vertex">Vertex to diagnose.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of VertexFailureDiagnosis.</returns> /// <param name="plan">Plan of the executed job.</param> public static VertexFailureDiagnosis CreateVertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(job.Summary); throw new InvalidOperationException("Config of type " + config.TypeOfCluster + " not handled"); }
/// <summary> /// Detect whether the vertex had problems reading a particular channel. /// </summary> /// <returns>The channel that cannot be read, or null if that's not the problem.</returns> /// <param name="manager">Communication manager.</param> public virtual ChannelEndpointDescription ChannelReadFailure(CommManager manager) { List<string> stack = this.StackTrace().ToList(); if (stack.Count == 0) return null; string firstLine = stack.First(); Regex errorMsg = new Regex(@"(.*)Exception: (.*)ailed to read from input channel at port (\d+)"); Match m = errorMsg.Match(firstLine); if (!m.Success) return null; int channelNo; bool success = int.TryParse(m.Groups[3].Value, out channelNo); if (!success) return null; try { this.Vertex.DiscoverChannels(true, false, true, manager); var channels = this.Vertex.InputChannels; if (channels == null) return null; if (channels.Count < channelNo) { this.Log(DiagnosisMessage.Importance.Error, "Could not discover channel " + channelNo, this.VertexName); return null; } return channels[channelNo]; } catch (Exception) { return null; } }
/// <summary> /// The cached of tasks on the cluster. /// </summary> /// <param name="virtualCluster">Virtual cluster selected; defined only for Scope clusters.</param> /// <param name="manager">Communication manager.</param> public IEnumerable<ClusterJobInformation> GetClusterJobList(string virtualCluster, CommManager manager) { this.RecomputeClusterJobList(virtualCluster, manager); return this.clusterJobs.Values.ToList(); }