protected void deploy(Exception __axe, crawlerErrorEnum __errorType, string __specificMessage, object __relevantInstance, IModelRecord __relevantRecord, crawlerDomainTask __task = null, int stacks = 1)
        {
            specificMessage = __specificMessage;
            relevantRecord  = __relevantRecord;
            if (relevantRecord == null)
            {
                if (__relevantInstance is IModelRecord)
                {
                    relevantRecord = (IModelRecord)__relevantInstance;
                }
            }

            if (__errorType == crawlerErrorEnum.none)
            {
                __errorType = crawlerErrorEnum.exceptionError;
            }

            if (__axe != null)
            {
                if (__axe is aceGeneralException)
                {
                    axe = (aceGeneralException)__axe;
                    axe.SetLogSerializable(this);
                }
                else if (__axe is Exception)
                {
                    if (__axe.Message.Contains("thread"))
                    {
                        isThreadCancelError = true;
                    }
                    axe = new aceGeneralException(__axe.Message, __axe, __relevantInstance, "Crawler error: " + __errorType.ToString(), stacks + 2);
                    axe.SetLogSerializable(this);
                }
                else
                {
                    axe = null;
                }
            }


            if (__relevantRecord != null)
            {
                relevantRecord = relevantRecord;
                // relevantRecord.SetLogSerializable(this);
            }

            if (__task != null)
            {
                task = __task;
                __task.SetLogSerializable(this);
                __errorType |= crawlerErrorEnum.domainTaskError;
            }
        }
 public crawlerErrorLog(Exception __axe, IModelRecord __relevantRecord = null, crawlerDomainTask __task = null, crawlerErrorEnum __errorType = crawlerErrorEnum.none, int stacks = 1)
 {
     deploy(__axe, __errorType, "", __task, __relevantRecord, __task, stacks + 1);
 }
Exemple #3
0
        internal crawlerErrorLog CreateAndSaveError(Exception ex, modelSpiderSiteRecord wRecord, crawlerDomainTask crawlerDomainTask, crawlerErrorEnum errorType)
        {
            crawlerErrorLog clog = crawlerErrorLog.CreateAndSave(ex, wRecord, crawlerDomainTask, errorType);

            clog.SaveXML(folder[DRFolderEnum.logs].pathFor("DLC_crash_" + wRecord.domainInfo.domainRootName.getFilename()));
            return(clog);
        }
        public static crawlerErrorLog CreateAndSave(Exception __axe, IModelRecord __relevantRecord = null, crawlerDomainTask __task = null, crawlerErrorEnum __errorType = crawlerErrorEnum.none)
        {
            crawlerErrorLog output = new crawlerErrorLog(__axe, __relevantRecord, __task, __errorType, 1);

            if (FirstError == null)
            {
                FirstError = output;
            }

            Console.WriteLine("Error [" + output.Title + "]");
            // Console.WriteLine("Message [" + output.Message + "]");
            // Console.WriteLine("Spec.msg. [" + output.specificMessage + "]");

            string path = output.SaveXML();

            Console.WriteLine("Crawler error log saved to: " + path);

            if (FirstError == output)
            {
                if (imbWEMManager.settings.executionLog.doOpenFirstException)
                {
                    externalToolExtensions.run(externalTool.notepadpp, path);
                }
            }

            if (imbWEMManager.settings.executionLog.doThrowDLCException)
            {
                throw __axe;
            }

            return(output);
        }
        /// <summary>
        /// Starts this instance.
        /// </summary>
        public void start()
        {
            iterationStatus = crawlerDomainTaskIterationPhase.iterationProcessNotStarted;
            status          = crawlerDomainTaskStatusEnum.working;

            executionThread = Thread.CurrentThread;


            if (status == crawlerDomainTaskStatusEnum.aborted)
            {
                aceLog.log("Aborted DomainTask --> start()");
                return;
            }



            lastIterationStart = DateTime.Now;
            startTime          = DateTime.Now;


            aceLog.consoleControl.setAsOutput(wRecord, "" + wProfile.domain);



            parent.parent.reportPlugins.eventUniversal(crawlReportingStageEnum.DLCPreinitiation, reporter, this, wRecord);



            try
            {
                iterationStatus = crawlerDomainTaskIterationPhase.loadingSeedPage;
                // <--- STAGE 1

                spiderWebLoader loader = new spiderWebLoader(parent.parent.dataLoadTaker);
                loader.controler = parent.parent.webLoaderControler;
                stageControl.prepare();


                spiderTask sTask = evaluator.getSpiderSingleTask(web.seedLink, wRecord, 1); // <--------

                spiderTaskResult sResult = loader.runSpiderTask(sTask, wRecord);            // <--------------------------------------------------------[ izvršava

                if (sResult.calculateSuccessRate() == 0)
                {
                    wRecord.log("Domain [" + wRecord.domain + "] is considered as failed since landing page load failed");
                    parent.parent.webLoaderControler.SetFailedDomain(wProfile, wRecord);
                }


                spiderObjectiveSolutionSet solSet = null;


                stageControl.stage.EnterStage(wRecord, evaluator);



                parent.parent.plugins.eventDLCInitiated(parent.parent, this, wRecord); //.eventDLCFinished(parent.parent, this, wRecord);
                evaluator.plugins.eventDLCInitiated(evaluator as spiderEvaluatorBase, this, wRecord);
                imbWEMManager.index.plugins.eventDLCInitiated(imbWEMManager.index.experimentEntry, this, wRecord);
                parent.parent.reportPlugins.eventDLCInitiated(reporter, this, wRecord);


                int lastTermCount = 0;

                // <--- STAGE 2
                do
                {
                    iterationStatus = crawlerDomainTaskIterationPhase.iterationStart;

                    lastIterationStart = DateTime.Now;

                    dataUnitSpiderIteration iDataUnit = wRecord.timeseries.CreateEntry(null, sTask.iteration);


                    iterationStatus = crawlerDomainTaskIterationPhase.receiveResult;

                    if (imbWEMManager.MASTERKILL_SWITCH)
                    {
                        aceLog.log("MASTERKILL SWITCH ON :: crawlerDomainTask->" + iterationStatus.ToString());
                        isStageAborted = true;
                        sResult.items.Clear();
                        sResult.task.Clear();
                        evaluator.settings.limitIterations    = wRecord.iteration - 5;
                        evaluator.settings.limitTotalPageLoad = 0;


                        Closing();
                        return;
                    }


                    if (isStageAborted)
                    {
                        Closing();
                        return;
                    }



                    evaluator.plugins.processLoaderResult(sResult, wRecord, this);

                    // wRecord.context.targets.termsAll.Count();

                    var iter = wRecord.iterationTableRecord.GetLastEntryTouched();
                    if (iter != null)
                    {
                        lastTermCount = iter.terms_all;
                    }

                    evaluator.operation_receiveResult(sResult, wRecord);

                    // __tc = wRecord.context.targets.termsAll.Count() - __tc;



                    if (isStageAborted)
                    {
                        Closing();
                        return;
                    }

                    iterationStatus = crawlerDomainTaskIterationPhase.applyLinkRules;
                    evaluator.plugins.processAfterResultReceived(wRecord, this);
                    solSet = evaluator.operation_applyLinkRules(wRecord);

                    if (isStageAborted)
                    {
                        Closing();
                        return;
                    }

                    iterationStatus = crawlerDomainTaskIterationPhase.getLoadTask;
                    sTask           = evaluator.operation_GetLoadTask(wRecord);

                    if (isStageAborted)
                    {
                        Closing();
                        return;
                    }

                    iterationStatus = crawlerDomainTaskIterationPhase.loadingTask;
                    if (isLoaderDisabled)
                    {
                        wRecord.log("-- Loader component is disabled for this [" + wRecord.domain + "] task.");
                        sResult = new spiderTaskResult();
                    }
                    else
                    {
                        sResult = loader.runSpiderTask(sTask, wRecord);
                    }
                    if (isStageAborted)
                    {
                        Closing();
                        return;
                    }


                    parent.parent.dataLoadTaker.AddIteration();

                    iterationStatus = crawlerDomainTaskIterationPhase.updatingData;

                    if (evaluator.settings.doEnableCrossLinkDetection)
                    {
                        evaluator.operation_detectCrossLinks(wRecord);
                    }

                    iDataUnit.checkData();

                    targetLoaded   = iDataUnit.tc_loaded_p;
                    targetDetected = iDataUnit.tc_detected_p;



                    if (reporter != null)
                    {
                        try {
                            int lTC   = 0;
                            var iter2 = wRecord.iterationTableRecord.GetLastEntryTouched();
                            if (iter2 != null)
                            {
                                lTC = iter2.terms_all - lastTermCount;
                            }

                            reporter.reportIteration(iDataUnit, wRecord, evaluator); // <------ ovde se kreira nova iteracija
                            imbWEMManager.index.plugins.eventIteration(imbWEMManager.index.experimentEntry, this, wRecord);


                            parent.parent.dataLoadTaker.AddContentPage(lTC, sResult.Count);
                        }
                        catch (Exception ex)
                        {
                            throw new aceGeneralException(ex.Message, ex, reporter, "Reporter.reportIteration() exception");
                        }
                    }

                    parent.parent.reportPlugins.eventIteration(evaluator, this, wRecord);


                    iterationStatus = crawlerDomainTaskIterationPhase.checkingRules;

                    if (targetLoaded >= evaluator.settings.limitTotalPageLoad)
                    {
                        isStageAborted = true;
                        wRecord.log("--- Loaded pages count meet limit [" + targetLoaded + "] on iteration [" + iDataUnit.iteration + "].");
                    }

                    if (iDataUnit.iteration >= evaluator.settings.limitIterations)
                    {
                        isStageAborted = true;
                        wRecord.log("--- Iteration limit reached [" + iDataUnit.iteration + "].");
                    }


                    if (DateTime.Now.Subtract(startTime).TotalMinutes >= parent.parent._timeLimitForDLC)
                    {
                        isStageAborted = true;
                        wRecord.log("--- Timeout : crawler domain task [" + wRecord.web.seedLink.url + "] aborted after [" + DateTime.Now.Subtract(startTime).TotalMinutes + "] minutes.");
                    }

                    if (isStageAborted)
                    {
                        break;
                    }
                } while ((!stageControl.stage.CheckStage(wRecord, solSet, sTask)) && !isStageAborted);
                iterationStatus = crawlerDomainTaskIterationPhase.pageEvaluation;

                // <---- STAGE 3
                wRecord.resultPageSet = evaluator.operation_evaluatePages(wRecord);

                Closing();
            } catch (Exception ex)
            {
                crawlerErrorEnum errorType = crawlerErrorEnum.domainTaskError;

                switch (iterationStatus)
                {
                case crawlerDomainTaskIterationPhase.applyLinkRules:
                    errorType = crawlerErrorEnum.spiderModuleError;
                    break;

                case crawlerDomainTaskIterationPhase.getLoadTask:
                    errorType = crawlerErrorEnum.spiderGetTaskError;
                    break;

                case crawlerDomainTaskIterationPhase.loadingTask:
                    errorType = crawlerErrorEnum.spiderLoadingError;
                    break;

                case crawlerDomainTaskIterationPhase.pageEvaluation:
                    errorType = crawlerErrorEnum.spiderModuleError;
                    break;
                }

                string domainName = wRecord.domainInfo.domainName;

                if (!tRecord.crashedDomains.Contains(domainName))
                {
                    wRecord.log("Domain crashed first time: " + ex.Message);
                    aceLog.log("Domain [" + domainName + "] crashed first time: " + ex.Message);
                    aceLog.log("Domain [" + domainName + "] is restarting... ");
                    status = crawlerDomainTaskStatusEnum.waiting;
                    tRecord.crashedDomains.Add(wRecord.domainInfo.domainName);
                    reInitialization();
                    start();
                }
                else
                {
                    status = crawlerDomainTaskStatusEnum.aborted;

                    wRecord.log("Aborted by execution exception: " + ex.Message);
                }

                var clog = reporter.CreateAndSaveError(ex, wRecord, this, errorType);
                wRecord.log(clog.Message);
                //  crawlerErrorLog cel = new crawlerErrorLog(ex, wRecord, this, errorType);
            } finally
            {
            }

            aceLog.consoleControl.removeFromOutput(wRecord); //, "sp:" + tRecord.instance.name);
        }