private void _managedOneTask(object domainTask) { crawlerDomainTask taskToRun = (crawlerDomainTask)domainTask; try { items.running.Add(taskToRun); taskToRun.start(); Thread.Sleep(50); crawlerDomainTask taskToRemove = taskToRun; logger.log("[" + taskToRun.wProfile.domain + "] finished [tl:" + taskToRun.targetLoaded + "][td:" + taskToRun.targetDetected + "]"); items.done.AddUnique(taskToRun); } catch (Exception ex) { crawlerDomainTask taskToRemove = taskToRun; items.done.AddUnique(taskToRun); crawlerErrorLog clog = crawlerErrorLog.CreateAndSave(ex, taskToRun.wRecord, taskToRun, crawlerErrorEnum.domainOneTaskError); aceLog.log("[" + taskToRun.wProfile.domain + "] Thread crashed [" + ex.Message + ":" + ex.GetType().Name); taskToRun.CallAbort(cancelTokens[taskToRun]); } }
/// <summary> /// Auto parallel execution --- the framework handles the thread pool and other stuff. /// </summary> private void _autoParallel() { Parallel.ForEach <crawlerDomainTask>(items.items, taskToRun => { string tskName = taskToRun.wProfile.domain; try { Thread.Sleep(50); //changeTaskRunning(); aceLog.log("[" + taskToRun.wProfile.domain + "] started on [id:" + Thread.CurrentThread.ManagedThreadId + "]"); items.running.Add(taskToRun); taskToRun.start(); Thread.Sleep(50); crawlerDomainTask taskToRemove = null; //items.running.TryTake(out taskToRemove); logger.log("[" + taskToRun.wProfile.domain + "] finished [tl:" + taskToRun.targetLoaded + "][td:" + taskToRun.targetDetected + "]"); items.done.AddUnique(taskToRun); //changeTaskDone(); //changeTaskRunning(false); } catch (Exception ex) { taskToRun.isStageAborted = true; aceLog.log("[" + tskName + "] Thread crashed [" + ex.Message + ":" + ex.GetType().Name); } }); allTaskDone = true; }
protected void deploy(Exception __axe, crawlerErrorEnum __errorType, string __specificMessage, object __relevantInstance, IModelRecord __relevantRecord, crawlerDomainTask __task = null, int stacks = 1) { specificMessage = __specificMessage; relevantRecord = __relevantRecord; if (relevantRecord == null) { if (__relevantInstance is IModelRecord) { relevantRecord = (IModelRecord)__relevantInstance; } } if (__errorType == crawlerErrorEnum.none) { __errorType = crawlerErrorEnum.exceptionError; } if (__axe != null) { if (__axe is aceGeneralException) { axe = (aceGeneralException)__axe; axe.SetLogSerializable(this); } else if (__axe is Exception) { if (__axe.Message.Contains("thread")) { isThreadCancelError = true; } axe = new aceGeneralException(__axe.Message, __axe, __relevantInstance, "Crawler error: " + __errorType.ToString(), stacks + 2); axe.SetLogSerializable(this); } else { axe = null; } } if (__relevantRecord != null) { relevantRecord = relevantRecord; // relevantRecord.SetLogSerializable(this); } if (__task != null) { task = __task; __task.SetLogSerializable(this); __errorType |= crawlerErrorEnum.domainTaskError; } }
/// <summary> /// Here the webSiteProfile is used to create crawlDomainTask and wRecords /// </summary> /// <param name="__tRecord">The t record.</param> /// <param name="__sample">The sample.</param> /// <param name="__parent">The parent.</param> public crawlerDomainTaskCollection(modelSpiderTestRecord __tRecord, List <webSiteProfile> __sample, crawlerDomainTaskMachine __parent) { sampleSize = __sample.Count(); tRecord = __tRecord; parent = __parent; foreach (webSiteProfile profile in __sample) { //var crawlerContext = tRecord.aRecord.crawledContextGlobalRegister.GetContext(profile.domain, tRecord.aRecord.sciProject.mainWebCrawler.mainSettings, profile, tRecord.aRecord.testRunStamp); var task = new crawlerDomainTask(profile, this); items.Enqueue(task); } }
public crawlerErrorLog(Exception __axe, IModelRecord __relevantRecord = null, crawlerDomainTask __task = null, crawlerErrorEnum __errorType = crawlerErrorEnum.none, int stacks = 1) { deploy(__axe, __errorType, "", __task, __relevantRecord, __task, stacks + 1); }
public static crawlerErrorLog CreateAndSave(Exception __axe, IModelRecord __relevantRecord = null, crawlerDomainTask __task = null, crawlerErrorEnum __errorType = crawlerErrorEnum.none) { crawlerErrorLog output = new crawlerErrorLog(__axe, __relevantRecord, __task, __errorType, 1); if (FirstError == null) { FirstError = output; } Console.WriteLine("Error [" + output.Title + "]"); // Console.WriteLine("Message [" + output.Message + "]"); // Console.WriteLine("Spec.msg. [" + output.specificMessage + "]"); string path = output.SaveXML(); Console.WriteLine("Crawler error log saved to: " + path); if (FirstError == output) { if (imbWEMManager.settings.executionLog.doOpenFirstException) { externalToolExtensions.run(externalTool.notepadpp, path); } } if (imbWEMManager.settings.executionLog.doThrowDLCException) { throw __axe; } return(output); }
public void statusReport() { lastStatusReport = DateTime.Now; // aceLog.consoleControl.setAsOutput(items.tRecord); logger.log("----------"); // logger.logBuilder.consoleAltColorToggle(); double maxLatency = 0; foreach (Task task in task_running.ToList()) { crawlerDomainTask taskInRun = task.AsyncState as crawlerDomainTask; double minRun = DateTime.Now.Subtract(taskInRun.startTime).TotalMinutes; string fR = "[d: _" + taskInRun.finishedRatio.ToString("P2") + "_ ]"; string LbyD = "[ _" + taskInRun.targetLoaded + "/" + taskInRun.targetDetected + "_ ]"; string TbyL = "[t: _" + minRun.ToString("#0.00") + "/" + _timeLimitForDLC.ToString() + "_ ]"; string dom = "(initiating)"; if (taskInRun.wRecord != null) { if (taskInRun.wRecord.state == modelRecordStateEnum.initiated) { dom = taskInRun.wRecord.domainInfo.domainName; } else { if (taskInRun.wProfile != null) { dom = taskInRun.wProfile.domain; } else { dom = "(initiating)"; } } } maxLatency = Math.Max(maxLatency, taskInRun.sinceLastIterationStart); string form = "{0,40} {1,12} {2,12} {3,12} {4,10}"; logger.log(string.Format(form, dom, fR, LbyD, TbyL, "[a:" + taskInRun.sinceLastIterationStart.ToString("#0.00") + "]")); } double DRatio = (double)task_finished.Count() / (double)tasks.Count(); double RRatio = (double)task_running.Count() / (double)tasks.Count(); double WRatio = (1 - ((double)task_started.Count() / (double)tasks.Count())); logger.log("--- " + items.tRecord.instance.name + " [w: _" + WRatio.ToString("P2") + "_ ] [d: _" + DRatio.ToString("P2") + "_ ]" + "] [r: _" + RRatio.ToString("P2") + "_ ]" + " [t: _" + (DateTime.Now.Subtract(startTime).TotalMinutes.ToString("#0.00")) + "_ ]"); plugins.eventUniversal <crawlerDomainTask, spiderEvaluatorBase>(crawlJobEngineStageEnum.statusReport, this, null, null); reportPlugins.eventStatusReport(this, tRecord); // logger.logBuilder.consoleAltColorToggle(); aceTerminalInput.doBeepViaConsole(4400, 200, 1); }
/// <summary> /// Optimizovan preko .NET Taskova --- tek treba da se implementira /// </summary> public void startAutoParallel(bool runManaged = false) { checkSettings(); reporter.signature = new crawlerSignature(); reporter.signature.deployTaskMachine(this); // aceLog.consoleControl.setAsOutput(logger.logBuilder, items.tRecord.instance.name); startTime = DateTime.Now; items.tRecord.aRecord.tGeneralRecord.recordStart(items.tRecord.aRecord.testRunStamp, "spiderGeneralRecord::" + items.tRecord.instance.name); items.tRecord.aRecord.tGeneralRecord.AddSideRecord(items.tRecord.aRecord.childRecord); taskWaiting = items.items.Count; allTaskDone = false; Thread runThread = null; reportPlugins.eventAtInitiationOfCrawlJob(this, tRecord); try { if (!runManaged) { runThread = new Thread(_autoParallel); } else { runThread = new Thread(_managedParallel); } } catch (Exception ex) { var clog = crawlerErrorLog.CreateAndSave(ex, items.tRecord, null, crawlerErrorEnum.TaskMachineRunThreadError); } runThread.Start(); try { do { cpuTaker.checkTake(); dataLoadTaker.checkTake(); measureTaker.checkTake(); if (DateTime.Now.Subtract(lastStatusReport).TotalMinutes > TimeForObligatoryReport) { plugins.eventUniversal <crawlerDomainTask, spiderEvalRuleBase>(crawlJobEngineStageEnum.performanceTakeCycle, this, null, null); } Thread.Sleep(imbWEMManager.settings.crawlerJobEngine.crawlerDomainCheckTickMs); foreach (crawlerDomainTask taskInRun in items.running.ToList()) { switch (taskInRun.status) { case crawlerDomainTaskStatusEnum.aborted: case crawlerDomainTaskStatusEnum.done: crawlerDomainTask rem = taskInRun; items.running.TryTake(out rem); items.done.AddUnique(taskInRun); break; default: //if (DateTime.Now.Subtract(taskInRun.startTime).TotalMinutes > TimeLimitForDomainCrawl) //{ // taskInRun.isStageAborted = true; // taskInRun.status = crawlerDomainTaskStatusEnum.aborted; //} break; } } if (imbWEMManager.MASTERKILL_SWITCH) { cpuTaker.take(); dataLoadTaker.take(); measureTaker.take(); Cancel(); } if (DateTime.Now.Subtract(lastStatusReport).TotalMinutes > TimeForObligatoryReport) { statusReport(); } if (DateTime.Now.Subtract(startTime).TotalMinutes > TimeLimitForCompleteJob) { aceLog.log("Job time limit triggered - cancelling all running tasks"); Cancel(); } if ((items.done.Count() - lastLoad) > LoadForMemoryFlush) { lastLoad = items.done.Count(); imbWEMManager.GCCall("Regular memory cleanup after " + lastLoad + " domains crawled"); } //if ((items.done.Count > 1) && (!items.running.Any())) //{ // allTaskDone = true; //} } while (!allTaskDone); } catch (Exception ex) { var clog = crawlerErrorLog.CreateAndSave(ex, items.tRecord, null, crawlerErrorEnum.TaskMachineMonitoringError); } aceLog.log("Terminating parent run thread"); runThread.Join(); aceLog.log("Parent thread terminated"); if (!imbWEMManager.MASTERKILL_SWITCH) { cpuTaker.take(); dataLoadTaker.take(); measureTaker.take(); } items.tRecord.aRecord.tGeneralRecord.recordFinish(); try { items.tRecord.instance.plugins.eventCrawlJobFinished(items.tRecord.aJob, this, items.tRecord); imbWEMManager.index.plugins.eventCrawlJobFinished(items.tRecord.aJob, this, items.tRecord); plugins.eventCrawlJobFinished(items.tRecord.aJob, this, items.tRecord); reportPlugins.eventCrawlJobFinished(items.tRecord.aJob, this, tRecord); } catch (Exception ex) { } aceTerminalInput.doBeepViaConsole(1200, 150, 2); // aceLog.log("[" + i + " / " + slimit + "] Spider[" + si + "][" + items.tRecord.instance.name + "] [" + percent.ToString("P") + "]"); // aceLog.consoleControl.removeFromOutput(logger.logBuilder); }
private void _managedParallel() { cancelTokens = new Dictionary <crawlerDomainTask, CancellationTokenSource>(); try { foreach (var item in items.items) { cancelTokens.Add(item, new CancellationTokenSource()); Task task = new Task(_managedOneTask, item, cancelTokens[item].Token); tasks.Add(task); task_waiting.Add(task); } maxThreads = Math.Min(maxThreads, tasks.Count); int rIndex = 0; while (task_finished.Count < tasks.Count) { if (task_running.Count < maxThreads) { int toRun = maxThreads - task_running.Count; for (int i = 0; i < toRun; i++) { Task task = task_waiting.FirstOrDefault(); if (task != null) { task_waiting.Remove(task); task_running.Add(task); task_started.Add(task); task.Start(); } else { } } rIndex = task_started.Count; } foreach (Task task in task_running.ToList()) { switch (task.Status) { case TaskStatus.Faulted: task_canceled.Add(task); task_finished.Add(task); task_running.Remove(task); break; case TaskStatus.RanToCompletion: task_finished.Add(task); task_running.Remove(task); break; case TaskStatus.Canceled: task_canceled.Add(task); task_finished.Add(task); task_running.Remove(task); break; default: break; } } Thread.Sleep(imbWEMManager.settings.crawlerJobEngine.crawlerDomainCheckTickMs); foreach (Task task in task_running.ToList()) { bool abortTask = false; crawlerDomainTask taskToRun = (crawlerDomainTask)task.AsyncState; if (taskToRun.startTime != DateTime.MinValue) { if (DateTime.Now.Subtract(taskToRun.startTime).TotalMinutes > TimeLimitForDomainCrawlCancelation) { abortTask = true; aceLog.log("Forced cancelation of [" + taskToRun.wProfile.domain + "] due double execution timeout [" + DateTime.Now.Subtract(taskToRun.startTime).TotalMinutes.ToString("#0.00") + "]", null, true); } else if (DateTime.Now.Subtract(taskToRun.startTime).TotalMinutes > _timeLimitForDLC) { abortTask = true; taskToRun.isStageAborted = true; } } if (!isEnabled) { aceLog.log("General Crawl Engine Abort Call --> " + taskToRun.wRecord.domain); abortTask = true; } if (abortTask) { taskToRun.CallAbort(cancelTokens[taskToRun]); // taskToRun.reporter.reportDomainFinished(taskToRun.wRecord); task_finished.Add(task); task_canceled.Add(task); task_running.Remove(task); } if (!taskToRun.IsActive()) { taskToRun.CallAbort(cancelTokens[taskToRun]); aceLog.log("Task [" + taskToRun.wProfile.domain + "] became inactive on state [" + taskToRun.iterationStatus.ToString() + "] -- calling for abortion", null, true); task_finished.Add(task); task_running.Remove(task); task_canceled.Add(task); } } if (DateTime.Now.Subtract(startTime).TotalMinutes > TimeLimitForCompleteJob) { aceLog.log("Canceling any threads creation due time limit reached"); break; } } } catch (Exception ex) { // plugins.eventUniversal(crawlJobEngineStageEnum.error, this); var clog = crawlerErrorLog.CreateAndSave(ex, items.tRecord, null, crawlerErrorEnum.TaskMachineError); } if (task_finished.Count == tasks.Count) { allTaskDone = true; } else if (items.done.Count == items.items.Count) { allTaskDone = true; } else if (items.running.Count == 0) { allTaskDone = true; } }