Exemplo n.º 1
0
        private void _managedOneTask(object domainTask)
        {
            crawlerDomainTask taskToRun = (crawlerDomainTask)domainTask;

            try
            {
                items.running.Add(taskToRun);

                taskToRun.start();
                Thread.Sleep(50);

                crawlerDomainTask taskToRemove = taskToRun;



                logger.log("[" + taskToRun.wProfile.domain + "] finished [tl:" + taskToRun.targetLoaded + "][td:" + taskToRun.targetDetected + "]");
                items.done.AddUnique(taskToRun);
            } catch (Exception ex)
            {
                crawlerDomainTask taskToRemove = taskToRun;


                items.done.AddUnique(taskToRun);

                crawlerErrorLog clog = crawlerErrorLog.CreateAndSave(ex, taskToRun.wRecord, taskToRun, crawlerErrorEnum.domainOneTaskError);

                aceLog.log("[" + taskToRun.wProfile.domain + "] Thread crashed [" + ex.Message + ":" + ex.GetType().Name);

                taskToRun.CallAbort(cancelTokens[taskToRun]);
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Auto parallel execution --- the framework handles the thread pool and other stuff.
        /// </summary>
        private void _autoParallel()
        {
            Parallel.ForEach <crawlerDomainTask>(items.items, taskToRun =>
            {
                string tskName = taskToRun.wProfile.domain;
                try
                {
                    Thread.Sleep(50);
                    //changeTaskRunning();

                    aceLog.log("[" + taskToRun.wProfile.domain + "] started on [id:" + Thread.CurrentThread.ManagedThreadId + "]");

                    items.running.Add(taskToRun);
                    taskToRun.start();

                    Thread.Sleep(50);
                    crawlerDomainTask taskToRemove = null;
                    //items.running.TryTake(out taskToRemove);
                    logger.log("[" + taskToRun.wProfile.domain + "] finished [tl:" + taskToRun.targetLoaded + "][td:" + taskToRun.targetDetected + "]");
                    items.done.AddUnique(taskToRun);
                    //changeTaskDone();
                    //changeTaskRunning(false);
                } catch (Exception ex)
                {
                    taskToRun.isStageAborted = true;
                    aceLog.log("[" + tskName + "] Thread crashed [" + ex.Message + ":" + ex.GetType().Name);
                }
            });

            allTaskDone = true;
        }
Exemplo n.º 3
0
        protected void deploy(Exception __axe, crawlerErrorEnum __errorType, string __specificMessage, object __relevantInstance, IModelRecord __relevantRecord, crawlerDomainTask __task = null, int stacks = 1)
        {
            specificMessage = __specificMessage;
            relevantRecord  = __relevantRecord;
            if (relevantRecord == null)
            {
                if (__relevantInstance is IModelRecord)
                {
                    relevantRecord = (IModelRecord)__relevantInstance;
                }
            }

            if (__errorType == crawlerErrorEnum.none)
            {
                __errorType = crawlerErrorEnum.exceptionError;
            }

            if (__axe != null)
            {
                if (__axe is aceGeneralException)
                {
                    axe = (aceGeneralException)__axe;
                    axe.SetLogSerializable(this);
                }
                else if (__axe is Exception)
                {
                    if (__axe.Message.Contains("thread"))
                    {
                        isThreadCancelError = true;
                    }
                    axe = new aceGeneralException(__axe.Message, __axe, __relevantInstance, "Crawler error: " + __errorType.ToString(), stacks + 2);
                    axe.SetLogSerializable(this);
                }
                else
                {
                    axe = null;
                }
            }


            if (__relevantRecord != null)
            {
                relevantRecord = relevantRecord;
                // relevantRecord.SetLogSerializable(this);
            }

            if (__task != null)
            {
                task = __task;
                __task.SetLogSerializable(this);
                __errorType |= crawlerErrorEnum.domainTaskError;
            }
        }
        /// <summary>
        /// Here the webSiteProfile is used to create crawlDomainTask and wRecords
        /// </summary>
        /// <param name="__tRecord">The t record.</param>
        /// <param name="__sample">The sample.</param>
        /// <param name="__parent">The parent.</param>
        public crawlerDomainTaskCollection(modelSpiderTestRecord __tRecord, List <webSiteProfile> __sample, crawlerDomainTaskMachine __parent)
        {
            sampleSize = __sample.Count();
            tRecord    = __tRecord;
            parent     = __parent;

            foreach (webSiteProfile profile in __sample)
            {
                //var crawlerContext = tRecord.aRecord.crawledContextGlobalRegister.GetContext(profile.domain, tRecord.aRecord.sciProject.mainWebCrawler.mainSettings, profile, tRecord.aRecord.testRunStamp);
                var task = new crawlerDomainTask(profile, this);
                items.Enqueue(task);
            }
        }
Exemplo n.º 5
0
 public crawlerErrorLog(Exception __axe, IModelRecord __relevantRecord = null, crawlerDomainTask __task = null, crawlerErrorEnum __errorType = crawlerErrorEnum.none, int stacks = 1)
 {
     deploy(__axe, __errorType, "", __task, __relevantRecord, __task, stacks + 1);
 }
Exemplo n.º 6
0
        public static crawlerErrorLog CreateAndSave(Exception __axe, IModelRecord __relevantRecord = null, crawlerDomainTask __task = null, crawlerErrorEnum __errorType = crawlerErrorEnum.none)
        {
            crawlerErrorLog output = new crawlerErrorLog(__axe, __relevantRecord, __task, __errorType, 1);

            if (FirstError == null)
            {
                FirstError = output;
            }

            Console.WriteLine("Error [" + output.Title + "]");
            // Console.WriteLine("Message [" + output.Message + "]");
            // Console.WriteLine("Spec.msg. [" + output.specificMessage + "]");

            string path = output.SaveXML();

            Console.WriteLine("Crawler error log saved to: " + path);

            if (FirstError == output)
            {
                if (imbWEMManager.settings.executionLog.doOpenFirstException)
                {
                    externalToolExtensions.run(externalTool.notepadpp, path);
                }
            }

            if (imbWEMManager.settings.executionLog.doThrowDLCException)
            {
                throw __axe;
            }

            return(output);
        }
Exemplo n.º 7
0
        public void statusReport()
        {
            lastStatusReport = DateTime.Now;



            // aceLog.consoleControl.setAsOutput(items.tRecord);
            logger.log("----------");
            //  logger.logBuilder.consoleAltColorToggle();



            double maxLatency = 0;

            foreach (Task task in task_running.ToList())
            {
                crawlerDomainTask taskInRun = task.AsyncState as crawlerDomainTask;
                double            minRun    = DateTime.Now.Subtract(taskInRun.startTime).TotalMinutes;


                string fR   = "[d: _" + taskInRun.finishedRatio.ToString("P2") + "_ ]";
                string LbyD = "[ _" + taskInRun.targetLoaded + "/" + taskInRun.targetDetected + "_ ]";
                string TbyL = "[t: _" + minRun.ToString("#0.00") + "/" + _timeLimitForDLC.ToString() + "_ ]";

                string dom = "(initiating)";

                if (taskInRun.wRecord != null)
                {
                    if (taskInRun.wRecord.state == modelRecordStateEnum.initiated)
                    {
                        dom = taskInRun.wRecord.domainInfo.domainName;
                    }
                    else
                    {
                        if (taskInRun.wProfile != null)
                        {
                            dom = taskInRun.wProfile.domain;
                        }
                        else
                        {
                            dom = "(initiating)";
                        }
                    }
                }

                maxLatency = Math.Max(maxLatency, taskInRun.sinceLastIterationStart);

                string form = "{0,40} {1,12} {2,12} {3,12} {4,10}";
                logger.log(string.Format(form, dom, fR, LbyD, TbyL, "[a:" + taskInRun.sinceLastIterationStart.ToString("#0.00") + "]"));
            }

            double DRatio = (double)task_finished.Count() / (double)tasks.Count();
            double RRatio = (double)task_running.Count() / (double)tasks.Count();
            double WRatio = (1 - ((double)task_started.Count() / (double)tasks.Count()));

            logger.log("--- " + items.tRecord.instance.name + " [w: _" + WRatio.ToString("P2") + "_ ] [d: _" + DRatio.ToString("P2") + "_ ]" + "] [r: _" + RRatio.ToString("P2") + "_ ]" + " [t: _" + (DateTime.Now.Subtract(startTime).TotalMinutes.ToString("#0.00")) + "_ ]");



            plugins.eventUniversal <crawlerDomainTask, spiderEvaluatorBase>(crawlJobEngineStageEnum.statusReport, this, null, null);
            reportPlugins.eventStatusReport(this, tRecord);



            // logger.logBuilder.consoleAltColorToggle();



            aceTerminalInput.doBeepViaConsole(4400, 200, 1);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Optimizovan preko .NET Taskova --- tek treba da se implementira
        /// </summary>
        public void startAutoParallel(bool runManaged = false)
        {
            checkSettings();

            reporter.signature = new crawlerSignature();
            reporter.signature.deployTaskMachine(this);

            //  aceLog.consoleControl.setAsOutput(logger.logBuilder, items.tRecord.instance.name);
            startTime = DateTime.Now;

            items.tRecord.aRecord.tGeneralRecord.recordStart(items.tRecord.aRecord.testRunStamp, "spiderGeneralRecord::" + items.tRecord.instance.name);

            items.tRecord.aRecord.tGeneralRecord.AddSideRecord(items.tRecord.aRecord.childRecord);

            taskWaiting = items.items.Count;
            allTaskDone = false;
            Thread runThread = null;


            reportPlugins.eventAtInitiationOfCrawlJob(this, tRecord);


            try
            {
                if (!runManaged)
                {
                    runThread = new Thread(_autoParallel);
                }
                else
                {
                    runThread = new Thread(_managedParallel);
                }
            } catch (Exception ex)
            {
                var clog = crawlerErrorLog.CreateAndSave(ex, items.tRecord, null, crawlerErrorEnum.TaskMachineRunThreadError);
            }

            runThread.Start();

            try
            {
                do
                {
                    cpuTaker.checkTake();
                    dataLoadTaker.checkTake();
                    measureTaker.checkTake();

                    if (DateTime.Now.Subtract(lastStatusReport).TotalMinutes > TimeForObligatoryReport)
                    {
                        plugins.eventUniversal <crawlerDomainTask, spiderEvalRuleBase>(crawlJobEngineStageEnum.performanceTakeCycle, this, null, null);
                    }

                    Thread.Sleep(imbWEMManager.settings.crawlerJobEngine.crawlerDomainCheckTickMs);

                    foreach (crawlerDomainTask taskInRun in items.running.ToList())
                    {
                        switch (taskInRun.status)
                        {
                        case crawlerDomainTaskStatusEnum.aborted:
                        case crawlerDomainTaskStatusEnum.done:
                            crawlerDomainTask rem = taskInRun;
                            items.running.TryTake(out rem);
                            items.done.AddUnique(taskInRun);
                            break;

                        default:

                            //if (DateTime.Now.Subtract(taskInRun.startTime).TotalMinutes > TimeLimitForDomainCrawl)
                            //{
                            //    taskInRun.isStageAborted = true;
                            //    taskInRun.status = crawlerDomainTaskStatusEnum.aborted;

                            //}
                            break;
                        }
                    }

                    if (imbWEMManager.MASTERKILL_SWITCH)
                    {
                        cpuTaker.take();
                        dataLoadTaker.take();
                        measureTaker.take();

                        Cancel();
                    }

                    if (DateTime.Now.Subtract(lastStatusReport).TotalMinutes > TimeForObligatoryReport)
                    {
                        statusReport();
                    }


                    if (DateTime.Now.Subtract(startTime).TotalMinutes > TimeLimitForCompleteJob)
                    {
                        aceLog.log("Job time limit triggered - cancelling all running tasks");
                        Cancel();
                    }

                    if ((items.done.Count() - lastLoad) > LoadForMemoryFlush)
                    {
                        lastLoad = items.done.Count();
                        imbWEMManager.GCCall("Regular memory cleanup after " + lastLoad + " domains crawled");
                    }

                    //if ((items.done.Count > 1) && (!items.running.Any()))
                    //{
                    //    allTaskDone = true;
                    //}
                } while (!allTaskDone);
            } catch (Exception ex)
            {
                var clog = crawlerErrorLog.CreateAndSave(ex, items.tRecord, null, crawlerErrorEnum.TaskMachineMonitoringError);
            }



            aceLog.log("Terminating parent run thread");
            runThread.Join();
            aceLog.log("Parent thread terminated");

            if (!imbWEMManager.MASTERKILL_SWITCH)
            {
                cpuTaker.take();
                dataLoadTaker.take();
                measureTaker.take();
            }


            items.tRecord.aRecord.tGeneralRecord.recordFinish();

            try
            {
                items.tRecord.instance.plugins.eventCrawlJobFinished(items.tRecord.aJob, this, items.tRecord);
                imbWEMManager.index.plugins.eventCrawlJobFinished(items.tRecord.aJob, this, items.tRecord);
                plugins.eventCrawlJobFinished(items.tRecord.aJob, this, items.tRecord);

                reportPlugins.eventCrawlJobFinished(items.tRecord.aJob, this, tRecord);
            } catch (Exception ex)
            {
            }
            aceTerminalInput.doBeepViaConsole(1200, 150, 2);

            // aceLog.log("[" + i + " / " + slimit + "] Spider[" + si + "][" + items.tRecord.instance.name + "]  [" + percent.ToString("P") + "]");

            // aceLog.consoleControl.removeFromOutput(logger.logBuilder);
        }
Exemplo n.º 9
0
        private void _managedParallel()
        {
            cancelTokens = new Dictionary <crawlerDomainTask, CancellationTokenSource>();

            try
            {
                foreach (var item in items.items)
                {
                    cancelTokens.Add(item, new CancellationTokenSource());

                    Task task = new Task(_managedOneTask, item, cancelTokens[item].Token);

                    tasks.Add(task);
                    task_waiting.Add(task);
                }

                maxThreads = Math.Min(maxThreads, tasks.Count);

                int rIndex = 0;
                while (task_finished.Count < tasks.Count)
                {
                    if (task_running.Count < maxThreads)
                    {
                        int toRun = maxThreads - task_running.Count;

                        for (int i = 0; i < toRun; i++)
                        {
                            Task task = task_waiting.FirstOrDefault();

                            if (task != null)
                            {
                                task_waiting.Remove(task);
                                task_running.Add(task);
                                task_started.Add(task);
                                task.Start();
                            }
                            else
                            {
                            }
                        }

                        rIndex = task_started.Count;
                    }

                    foreach (Task task in task_running.ToList())
                    {
                        switch (task.Status)
                        {
                        case TaskStatus.Faulted:
                            task_canceled.Add(task);
                            task_finished.Add(task);
                            task_running.Remove(task);
                            break;

                        case TaskStatus.RanToCompletion:
                            task_finished.Add(task);
                            task_running.Remove(task);
                            break;

                        case TaskStatus.Canceled:
                            task_canceled.Add(task);
                            task_finished.Add(task);
                            task_running.Remove(task);
                            break;

                        default:
                            break;
                        }
                    }

                    Thread.Sleep(imbWEMManager.settings.crawlerJobEngine.crawlerDomainCheckTickMs);

                    foreach (Task task in task_running.ToList())
                    {
                        bool abortTask = false;



                        crawlerDomainTask taskToRun = (crawlerDomainTask)task.AsyncState;


                        if (taskToRun.startTime != DateTime.MinValue)
                        {
                            if (DateTime.Now.Subtract(taskToRun.startTime).TotalMinutes > TimeLimitForDomainCrawlCancelation)
                            {
                                abortTask = true;


                                aceLog.log("Forced cancelation of [" + taskToRun.wProfile.domain + "] due double execution timeout [" + DateTime.Now.Subtract(taskToRun.startTime).TotalMinutes.ToString("#0.00") + "]", null, true);
                            }
                            else if (DateTime.Now.Subtract(taskToRun.startTime).TotalMinutes > _timeLimitForDLC)
                            {
                                abortTask = true;

                                taskToRun.isStageAborted = true;
                            }
                        }

                        if (!isEnabled)
                        {
                            aceLog.log("General Crawl Engine Abort Call --> " + taskToRun.wRecord.domain);

                            abortTask = true;
                        }


                        if (abortTask)
                        {
                            taskToRun.CallAbort(cancelTokens[taskToRun]);



                            // taskToRun.reporter.reportDomainFinished(taskToRun.wRecord);

                            task_finished.Add(task);
                            task_canceled.Add(task);
                            task_running.Remove(task);
                        }



                        if (!taskToRun.IsActive())
                        {
                            taskToRun.CallAbort(cancelTokens[taskToRun]);
                            aceLog.log("Task [" + taskToRun.wProfile.domain + "] became inactive on state [" + taskToRun.iterationStatus.ToString() + "] -- calling for abortion", null, true);
                            task_finished.Add(task);
                            task_running.Remove(task);
                            task_canceled.Add(task);
                        }
                    }



                    if (DateTime.Now.Subtract(startTime).TotalMinutes > TimeLimitForCompleteJob)
                    {
                        aceLog.log("Canceling any threads creation due time limit reached");
                        break;
                    }
                }
            } catch (Exception ex)
            {
                //  plugins.eventUniversal(crawlJobEngineStageEnum.error, this);

                var clog = crawlerErrorLog.CreateAndSave(ex, items.tRecord, null, crawlerErrorEnum.TaskMachineError);
            }

            if (task_finished.Count == tasks.Count)
            {
                allTaskDone = true;
            }
            else if (items.done.Count == items.items.Count)
            {
                allTaskDone = true;
            }
            else if (items.running.Count == 0)
            {
                allTaskDone = true;
            }
        }