Пример #1
0
        /// <summary>
        /// E1: Operations the receive result.
        /// </summary>
        /// <param name="stResult">The st result.</param>
        /// <param name="wRecord">The s record.</param>
        public dataUnitSpiderIteration operation_receiveResult(spiderTaskResult stResult, modelSpiderSiteRecord wRecord)
        {
            dataUnitSpiderIteration iDataUnit = wRecord.timeseries[stResult.task.iteration];

            wRecord.logBuilder.log("Received: " + stResult.Count() + " (it:" + stResult.task.iteration + ")");

            wRecord.iteration = stResult.task.iteration;

            if (stResult.Any())
            {
                wRecord.spiderTaskResults.Add(stResult);
            }

            int targetCount = wRecord.web.webTargets.items.Count();

            int linkFlush   = wRecord.context.acceptLoadedPages(stResult);
            int nw_failed_l = wRecord.context.processLoaderResult(stResult, settings.FRONTIER_doLinkResolver, settings.FRONTIER_doLinkHarvest, settings.FRONTIER_harvestNature, settings.FRONTIER_harvestScope);


            int newLinks = wRecord.web.webTargets.items.Count() - targetCount;

            iDataUnit.nw_detected_l = newLinks;

            iDataUnit.nw_failed_l   = nw_failed_l;
            iDataUnit.tc_detected_l = wRecord.web.webLinks.items.Count();
            iDataUnit.tc_loaded_p   = wRecord.web.webPages.items.Count();
            iDataUnit.tc_detected_p = wRecord.web.webTargets.items.Count();
            iDataUnit.tc_ingame_l   = wRecord.web.webActiveLinks.items.Count();

            iDataUnit.nw_processed_l = linkFlush;


            //sRecord.timeline.timeSeries[sRecord.iteration] = new PropertyCollectionExtended();
            //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_iteration, sRecord.iteration);
            //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_pagesloaded, );
            //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_totallinks, sRecord.web.webLinks.items.Count());
            //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_activelinks, );
            //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_tasksize, stResult.task.Count());
            //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_newlinks, newLinks);
            wRecord.logBuilder.log("Active links [" + wRecord.web.webActiveLinks.items.Count() + "] change [" + newLinks + "]");

            return(iDataUnit);
        }
Пример #2
0
        /// <summary>
        /// Runs the spider task.
        /// </summary>
        /// <param name="sTask">The s task.</param>
        /// <param name="crawlerContext">The crawler context.</param>
        /// <returns></returns>
        public spiderTaskResult runSpiderTask(spiderTask sTask, modelSpiderSiteRecord wRecord)
        {
            spiderTaskResult sResult = sTask.createResult();

            try
            {
                if (imbWEMManager.settings.crawlerJobEngine.crawlerDoParallelTaskLoads)
                {
                    Parallel.ForEach(sTask, ln =>
                    {
                        modelSpiderPageRecord pRecord = wRecord.getChildRecord(ln, ln.url); //.startChildRecord(ln, ln.url);

                        spiderTaskResultItem rItem = runSpiderTaskItem(ln, sTask.doTokenization, pRecord);

                        if (rItem.status != pageStatus.failed)
                        {
                            wRecord.context.targets.AttachPage(rItem, pRecord.logBuilder, blockCount); // <-------------------------------- [ STIZE
                        }

                        sResult.AddResult(rItem);
                    });
                }
                else
                {
                    foreach (spiderLink ln in sTask)
                    {
                        modelSpiderPageRecord pRecord = wRecord.getChildRecord(ln, ln.url); //.startChildRecord(ln, ln.url);

                        spiderTaskResultItem rItem = runSpiderTaskItem(ln, sTask.doTokenization, pRecord);

                        if (rItem.status != pageStatus.failed)
                        {
                            wRecord.context.targets.AttachPage(rItem, pRecord.logBuilder, blockCount);
                        }

                        sResult.AddResult(rItem);
                    }
                }
            } catch (Exception ex)
            {
                imbWEMManager.log.log("runSpiderTask exception: " + ex.Message);
            }

            loadIndex = loadIndex + sResult.Count();


            if (loadIndex > imbWEMManager.settings.crawlerJobEngine.loadCountForGC)
            {
                long mem = GC.GetTotalMemory(false);
                GC.Collect();
                GC.WaitForFullGCComplete();
                long dmem = GC.GetTotalMemory(false);

                aceLog.log("Memory allocation reduction [after " + loadIndex + " tasks]: " + (mem - dmem).getMByteCountFormated());
                loadIndex = 0;
            }



            sResult.finish();

            return(sResult);
        }