예제 #1
0
 public void eventIteration(object __parent, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
     foreach (indexPlugIn_base plug in allPlugins)
     {
         plug.eventIteration(__parent as experimentSessionEntry, __task, __wRecord); /// aJob, __machine, __tRecord);
     }
 }
 public override void reportDomainFinished(directAnalyticReporter reporter, modelSpiderSiteRecord wRecord)
 {
     foreach (ISpiderModuleBase module in modules)
     {
         module.reportDomainFinished(reporter, wRecord);
     }
 }
예제 #3
0
 public void eventDLCFinished(object __parent, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
     foreach (IPlugInCommonBase <crawlReportingStageEnum, directReporterBase> plug in allPlugins)
     {
         plug.eventDLCFinished(__parent as directReporterBase, __task, __wRecord); /// aJob, __machine, __tRecord);
     }
 }
 public override void reportIteration(directAnalyticReporter reporter, modelSpiderSiteRecord wRecord)
 {
     foreach (ISpiderModuleBase module in modules)
     {
         module.reportIteration(reporter, wRecord);
     }
 }
예제 #5
0
 public void eventDLCInitiated(object __parent, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
     foreach (IPlugInCommonBase <crawlJobEngineStageEnum, crawlerDomainTaskMachine> plug in allPlugins)
     {
         plug.eventDLCInitiated(__parent as crawlerDomainTaskMachine, __task, __wRecord); /// aJob, __machine, __tRecord);
     }
 }
예제 #6
0
 public void eventDLCFinished(object __parent, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
     foreach (IPlugInCommonBase <crawlerDomainTaskIterationPhase, spiderEvaluatorBase> plug in allPlugins)
     {
         plug.eventDLCFinished(__parent as spiderEvaluatorBase, __task, __wRecord); /// aJob, __machine, __tRecord);
     }
 }
예제 #7
0
 public void eventDLCFinished(object __parent, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
     foreach (IPlugInCommonBase <indexMaintenanceStageEnum, experimentSessionEntry> plug in allPlugins)
     {
         plug.eventDLCFinished(__parent as experimentSessionEntry, __task, __wRecord); /// aJob, __machine, __tRecord);
     }
 }
        /// <summary>
        /// Sets the failed domain.
        /// </summary>
        /// <param name="wProfile">The w profile.</param>
        public void SetFailedDomain(webSiteProfile wProfile, modelSpiderSiteRecord wRecord = null)
        {
            if ((wRecord == null) && (wProfile == null))
            {
                var axe = new aceGeneralException("Supplied wProfile and wRecord are null - this task should never run on the first place", null, this, "SetFailedDomain(webSiteProfile null)");
                throw axe;
            }
            else
            {
                string domain = "[unknown]";
                if (wProfile != null)
                {
                    domain = wProfile.domain;
                }
                if (wRecord != null)
                {
                    domain = wRecord.domain;
                }

                if (!failedSample.ContainsKey(domain))
                {
                    failedSample.Add(domain, wProfile);
                    domainFailList.Append(domain, true);
                }
            }
        }
예제 #9
0
        public void SetTarget(modelSpiderSiteRecord wRecord, indexPage page)
        {
            link ln = new link(page.url);

            //spiderLink sLink = new spiderLink()
            wRecord.context.processLink(ln, wRecord.web.webPages.items.FirstOrDefault().Value, false);
        }
예제 #10
0
        /// <summary>
        /// Evaluation procedure -- implementation for modules without layers
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="wRecord">The w record.</param>
        /// <returns></returns>
        public override ISpiderModuleData evaluate(ISpiderModuleData input, modelSpiderSiteRecord wRecord)
        {
            List <spiderLink>             output  = new List <spiderLink>();
            spiderModuleData <spiderLink> outdata = new spiderModuleData <spiderLink>();

            moduleDLCRecord       moduleLevelReportTable    = ((spiderModuleData <spiderLink>)input).moduleDLC;
            moduleIterationRecord moduleDLCRecordTableEntry = ((spiderModuleData <spiderLink>)input).moduleDLCRecordTableEntry;

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                //dataInput.moduleDLCRecordTableEntry = dataInput.moduleDLC.GetOrCreate(wRecord.iteration.ToString("D3") + module.name);
                moduleDLCRecordTableEntry.reportEvaluateStart(input as spiderModuleData <spiderLink>, wRecord, this); // <--- module level report --- start
            }

            input.active.ForEach(x => output.Add(x as spiderLink)); // ----- this is part where the layer modules are emulated

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                moduleDLCRecordTableEntry.reportEvaluateEnd(output, wRecord, this);                                                             // <--- module level report --- start
            }
            outdata.active.AddRange(rankLinks(output, wRecord.iteration));


            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                moduleDLCRecordTableEntry.reportEvaluateAlterRanking(outdata.active, wRecord, this);                                                            // <--- module level report --- start
            }
            return(outdata);
        }
예제 #11
0
        protected spiderTask __operation_GetLoadTaskCommon(modelSpiderSiteRecord wRecord, IEnumerable <spiderLink> activeLinks)
        {
            operation_doControlAndStats(wRecord);
            // <------------------------------------------------------------------------------------------


            int n = wRecord.context.GetNextIterationLTSize(activeLinks);



            // wRecord.logBuilder.log("Creating new spiderTask for iteration " + wRecord.iteration + " with " + n + " links to load. To Limit: " + toLimit);

            spiderTask outputTask = new spiderTask(wRecord.iteration + 1, wRecord.web);

            outputTask.AddRange(activeLinks.Take(n));

            foreach (var ali in activeLinks)
            {
                if (!outputTask.Contains(ali))
                {
                    ali.marks.cycleRegistration(wRecord.iteration);
                }
            }


            return(outputTask);
        }
예제 #12
0
        internal crawlerErrorLog CreateAndSaveError(Exception ex, modelSpiderSiteRecord wRecord, crawlerDomainTask crawlerDomainTask, crawlerErrorEnum errorType)
        {
            crawlerErrorLog clog = crawlerErrorLog.CreateAndSave(ex, wRecord, crawlerDomainTask, errorType);

            clog.SaveXML(folder[DRFolderEnum.logs].pathFor("DLC_crash_" + wRecord.domainInfo.domainRootName.getFilename()));
            return(clog);
        }
예제 #13
0
        public override spiderObjectiveSolution evaluate(modelSpiderSiteRecord sRecord, params object[] resources)
        {
            int c = 0;

            if (wRecord.web.webPages.Count() < treshold)
            {
                return(null);
            }

            int w = wRecord.web.webPages.Count() / 2;

            foreach (spiderPage sp in wRecord.web.webPages.items.Values)
            {
                if (sp.relationship.crossLinks.Count > w)
                {
                    c++;
                }
            }
            if (c > treshold)
            {
                spiderObjectiveSolution sos = new spiderObjectiveSolution(objective, afirmative);
                return(sos);
            }
            return(null);
        }
예제 #14
0
        public override spiderObjectiveSolution evaluate(spiderLink element, modelSpiderSiteRecord sRecord, params object[] resources)
        {
            spiderObjectiveSolution sol = new spiderObjectiveSolution();

            if (scoreList.Count < 2)
            {
                return(sol);
            }
            if (wRecord.web.webActiveLinks.Count > treshold)
            {
                if (q1 == int.MinValue)
                {
                    double __q1;
                    double __q3;
                    Measures.Quartiles(scoreList.ToArray(), out __q1, out __q3, true);
                    q1 = Convert.ToInt32(__q1);
                    q3 = Convert.ToInt32(__q3);
                }

                if (element.marks.score <= q1)
                {
                    sol = new spiderObjectiveSolution(element, spiderObjectiveStatus.aborted);
                }
                else
                {
                }
            }
            return(sol);
        }
예제 #15
0
        public void reportEvaluateStart(spiderModuleData <spiderLink> input, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance)
        {
            start     = DateTime.Now;
            iteration = wRecord.iteration;

            int cyclers_c     = 0;
            int recyclers_c   = 0;
            int cyclers_age_c = 0;
            int input_age     = 0;

            foreach (spiderLink link in input.active)
            {
                inputTargets_collection.Add(link.url);

                if (link.marks.cycleCount > 0)
                {
                    if (link.marks.cycleLastIteration == (iteration - 1))
                    {
                        cyclers_c++;
                        cyclers_age_c += iteration - link.iterationDiscovery;
                    }
                    else if (link.marks.cycleLastIteration < (iteration - 1))
                    {
                        recyclers_c++;
                    }
                }

                input_age += iteration - link.iterationDiscovery;
            }

            inputTargets = input.active.Count();
            processed    = inputTargets; // <-- razlika je samo u agregaciji


            age = input_age.GetRatio(inputTargets);


            inputTargets_assertion   = imbWEMManager.index.pageIndexTable.GetUrlAssertion(inputTargets_collection);
            inputPotentialPrecission = inputTargets_assertion.relevant;
            evaluationCertainty      = inputTargets_assertion.certainty;
            inputTargets_assertion.performInfoGainEstimation();

            PotInputIP = inputTargets_assertion.IPnominal;


            targets = inputTargets;

            layerModule = moduleInstance as spiderLayerModuleBase;

            cyclers   = cyclers_c.GetRatio(inputTargets);
            recyclers = recyclers_c.GetRatio(inputTargets);

            if (layerModule != null)
            {
                accumulation = layerModule.layers.CountAll;

                targets += accumulation;
            }
        }
예제 #16
0
 public textByIteration this[modelSpiderSiteRecord wRecord]
 {
     get
     {
         string key = wRecord.domainInfo.domainRootName;
         return(items.GetOrAdd(key, new textByIteration(key)));
     }
 }
예제 #17
0
        public override void learn(spiderPage element, modelSpiderSiteRecord sRecord, params object[] resources)
        {
            int cross = element.relationship.crossLinks.Count();

            min = Math.Min(cross, min);
            max = Math.Max(cross, max);
            scoreList.Add(Convert.ToDouble(cross));
        }
예제 #18
0
        public bool CheckStage(modelSpiderSiteRecord wRecord, spiderObjectiveSolutionSet oSet, spiderTask task)
        {
            bool okToLeave = false;

            if (task.Count() == 0)
            {
                wRecord.logBuilder.log("> Spider task [i:" + task.iteration + "] have no tasks defined. Aborting the stage loop.");
                okToLeave = true;
                return(okToLeave);
            }

            // <----------------------------- OBJECTIVE SOLUTION SET
            okToLeave = operation_executeObjectiveSolutionSet(oSet, wRecord);
            if (okToLeave)
            {
                return(okToLeave);
            }

            // <----------------------------- SPIDER LIMITS OVERRIDERS ---------------|
            if (stageIteration > wRecord.tRecord.instance.settings.limitIterations)
            {
                wRecord.log("> Spider settings (limit iterations) trigered abort at [" + stageIteration + "] Aborting the stage loop.");
                okToLeave = true;
                return(okToLeave);
            }
            // <----------------------------------------------------------------------|

            // <----------------------------- SPIDER LIMITS OVERRIDERS ---------------|
            if (wRecord.web.webPages.Count() > wRecord.tRecord.instance.settings.limitTotalPageLoad)
            {
                wRecord.log("> Spider settings (limit pages load) trigered abort at [" + wRecord.web.webPages.Count() + "] Aborting the stage loop.");
                okToLeave = true;
                return(okToLeave);
            }
            // <----------------------------------------------------------------------|



            if (stageIteration > stageIterationLimit)
            {
                wRecord.logBuilder.log("> Stage [" + name + "] iteration limit reached [ " + stageIterationLimit + " ] -- aborting [" + objectives.Count + "] objectives and move on");
                okToLeave = true;
                return(okToLeave);
            }

            if (stageIteration > GLOBAL_stageIterationLimit)
            {
                Exception ex = new aceGeneralException("spiderStage [" + name + "] reached the " + nameof(GLOBAL_stageIterationLimit) + "(" + GLOBAL_stageIterationLimit.ToString() + ")");
                throw ex;
            }

            stageIteration++;



            return(okToLeave);
        }
예제 #19
0
        /// <summary>
        /// Populate relationship information
        /// </summary>
        /// <param name="sRecord">The s record.</param>
        /// <returns></returns>
        public void operation_detectCrossLinks(modelSpiderSiteRecord sRecord)
        {
            // sRecord.logBuilder.log("Detection of cross links started for: " + sRecord.web.webPages.items.Count());

            // Connect all
            foreach (KeyValuePair <string, spiderLink> ln_pair in sRecord.web.webLinks.items)
            {
                foreach (KeyValuePair <string, spiderPage> pg_pair in sRecord.web.webPages.items)
                {
                    int pos = ln_pair.Key.IndexOf(pg_pair.Key);
                    if (pos == -1)
                    {
                        //sRecord.logBuilder.log("No inner page was associated with hash key [" + pg_pair.Key + "] : this must be root");
                    }
                    else if (pos < 5)
                    {
                        pg_pair.Value.relationship.outflowLinks.Add(ln_pair.Key, ln_pair.Value);
                    }
                    else
                    {
                        pg_pair.Value.relationship.inflowLinks.Add(ln_pair.Key, ln_pair.Value);
                    }
                }
            }


            int totalCrossLinks = 0;

            sRecord.crossLinkStats.StartNew();

            foreach (KeyValuePair <string, spiderPage> pg_pair in sRecord.web.webPages.items)
            {
                totalCrossLinks = 0;
                foreach (KeyValuePair <string, spiderLink> ln in pg_pair.Value.relationship.inflowLinks)
                {
                    string inverse = ln.Value.getLinkSignature(false, true);
                    if (sRecord.web.webLinks.items.ContainsKey(inverse))
                    {
                        pg_pair.Value.relationship.crossLinks.Add(inverse, sRecord.web.webLinks.items[inverse]);
                        totalCrossLinks++;
                    }
                }

                sRecord.crossLinkStats.Current().Add(totalCrossLinks);
            }



            //sRecord.stats.add(modelSpiderSideFields.mss_totalcrosslinks, totalCrossLinks);

            //sRecord.stats.add(modelSpiderSiteTimelineEnum.tl_iteration, sRecord.iteration);
            //sRecord.stats.add(modelSpiderSiteTimelineEnum.tl_pagesloaded, sRecord.web.webPages.items.Count());
            //sRecord.stats.add(modelSpiderSiteTimelineEnum.tl_totallinks, sRecord.web.webLinks.items.Count());
            //sRecord.stats.add(modelSpiderSiteTimelineEnum.tl_activelinks, sRecord.web.webActiveLinks.items.Count());

            // imbWEMManager.log.log("Detection of cross links finished: " + totalCrossLinks);
        }
예제 #20
0
 public override spiderObjectiveSolution evaluate(modelSpiderSiteRecord sRecord, params object[] resources)
 {
     if (wRecord.web.webPages.Count() > treshold)
     {
         spiderObjectiveSolution sos = new spiderObjectiveSolution(objective, afirmative);
         return(sos);
     }
     return(null);
 }
예제 #21
0
        public override void startIteration(int currentIteration, modelSpiderSiteRecord __wRecord)
        {
            layerActiveRules.ForEach(x => x.startIteration(currentIteration, __wRecord));
            layerPassiveRules.ForEach(x => x.startIteration(currentIteration, __wRecord));
            rankingTargetActiveRules.ForEach(x => x.startIteration(currentIteration, __wRecord));

            //--- currentIteration start

            //rankingTargetPassiveRules.ForEach(x=>x.p)
        }
예제 #22
0
        /// <summary>
        /// Creates single web loading task
        /// </summary>
        /// <param name="lnk">The LNK.</param>
        /// <param name="sReport">The s report.</param>
        /// <param name="iteration">The iteration.</param>
        /// <returns></returns>
        public virtual spiderTask getSpiderSingleTask(spiderLink lnk, modelSpiderSiteRecord sReport, int iteration)
        {
            spiderTask output = new spiderTask(iteration, sReport.web);

            // output.doTokenization = flags.HasFlag(spiderEvaluatorExecutionFlags.doTokenization);

            output.Add(lnk);

            return(output);
        }
예제 #23
0
        public override spiderObjectiveSolution evaluate(modelSpiderSiteRecord sRecord, params object[] resources)
        {
            dataUnitSpiderIteration last = wRecord.timeseries.lastEntry as dataUnitSpiderIteration;

            if (last.st_detected_p >= treshold)
            {
                spiderObjectiveSolution sos = new spiderObjectiveSolution(objective, afirmative);
                return(sos);
            }
            return(null);
        }
예제 #24
0
 public folderNode getIterationFolder(int iteration, modelSpiderSiteRecord wRecord)
 {
     if (siteRecords != null)
     {
         return(siteRecords[wRecord].Add("I" + iteration.ToString("D3"), wRecord.domainInfo.domainRootName + iteration.ToString("D3"), "Iteration " + iteration + " on domain: " + wRecord.domainInfo.domainName + ". " + iterationDescription));
     }
     else
     {
         return(null);
     }
 }
예제 #25
0
 /// <summary>
 /// Generates ouiteration out.
 /// </summary>
 /// <param name="wRecord">The w record.</param>
 /// <param name="fn">The function.</param>
 public void reportIterationOut(modelSpiderSiteRecord wRecord, folderNode fn)
 {
     generalRecords.GetLastEntry().saveObjectToXML(fn.pathFor("modules_performance.xml"));
     foreach (moduleDLCRecord mod in wRecord.frontierDLC)
     {
         var lastModEntry = mod.GetLastEntry();
         if (lastModEntry != null)
         {
             lastModEntry.saveObjectToXML(fn.pathFor("module_" + mod.moduleName + ".xml"));
         }
     }
 }
예제 #26
0
        public void EnterStage(modelSpiderSiteRecord wRecord, ISpiderEvaluatorBase sEvaluator)
        {
            wRecord.logBuilder.log("-- entering stage [" + name + "] with " + objectives.Count() + " objectives.");
            wRecord.logBuilder.log("> " + description + " (codename:" + codename + ")");
            wRecord.logBuilder.log("> stage iteration limit: " + stageIterationLimit + " (global limit:" + GLOBAL_stageIterationLimit + ")");

            foreach (spiderObjective objective in objectives)
            {
                objective.prepare();
                wRecord.logBuilder.log("> Objective [" + objective.name + "] t:" + objective.supervisor + " ");
            }
        }
예제 #27
0
        public override spiderObjectiveSolution evaluate(modelSpiderSiteRecord sRecord, params object[] resources)
        {
            dataUnitSpiderIteration newEntry = wRecord.timeseries.currentEntry as dataUnitSpiderIteration;

            if (newEntry.avg_score_l_trend < -treshold)
            {
                return(new spiderObjectiveSolution(objective, afirmative));
            }
            else
            {
                return(new spiderObjectiveSolution(objective, denial));
            }
        }
예제 #28
0
        public spiderTargetCollection(modelSpiderSiteRecord __wRecord)
        {
            wRecord = __wRecord;
            string __domain     = wRecord.domain;
            string __spiderName = wRecord.spider.name;

            name        = __spiderName + " targets on " + __domain;
            description = "Registry of unique absolute URLs discovered on the web site: " + __domain + " by the " + __spiderName + " crawler";

            dlTargetLinkTokens = new termDocumentSet(GetHash("links_" + __domain + " " + __spiderName), "URL and anchor text tokens from links discovered on the web site: " + __domain + " by the " + __spiderName + " crawler");
            if (wRecord.tRecord.instance.settings.doEnableDLC_TFIDF)
            {
                dlTargetPageTokens = new termDocumentSet(GetHash("pages_" + __domain + " " + __spiderName), "Content text tokens from loaded pages the web site: " + __domain + " by the " + __spiderName + " crawler");
            }
        }
예제 #29
0
        public void SetActiveTargets(modelSpiderSiteRecord wRecord, indexDomain domain)
        {
            List <indexPage> pages = domain.getPageSet();
            // wRecord.web.setSeedUrl(domain.url);
            //spiderPage sp = new spiderPage()

            crawledPage cpage = new crawledPage(domain.url, 0);

            spiderPage spage = new spiderPage(cpage, 0, 0);

            foreach (indexPage p in pages)
            {
                link l = new link(p.url);
                wRecord.context.processLink(l, spage, false);
            }
        }
예제 #30
0
 public virtual void startIteration(int currentIteration, modelSpiderSiteRecord __wRecord)
 {
     foreach (IRuleBase rule in rules)
     {
         if (rule is IRuleActiveBase)
         {
             IRuleActiveBase rule_IRuleActiveBase = (IRuleActiveBase)rule;
             rule_IRuleActiveBase.startIteration(currentIteration, __wRecord);
         }
         else if (rule is layerDistributionRuleBase)
         {
             layerDistributionRuleBase rule_layerDistributionRuleBase = (layerDistributionRuleBase)rule;
             rule_layerDistributionRuleBase.startIteration(currentIteration, __wRecord);
         }
     }
 }