/// <summary> /// E1: Operations the receive result. /// </summary> /// <param name="stResult">The st result.</param> /// <param name="wRecord">The s record.</param> public dataUnitSpiderIteration operation_receiveResult(spiderTaskResult stResult, modelSpiderSiteRecord wRecord) { dataUnitSpiderIteration iDataUnit = wRecord.timeseries[stResult.task.iteration]; wRecord.logBuilder.log("Received: " + stResult.Count() + " (it:" + stResult.task.iteration + ")"); wRecord.iteration = stResult.task.iteration; if (stResult.Any()) { wRecord.spiderTaskResults.Add(stResult); } int targetCount = wRecord.web.webTargets.items.Count(); int linkFlush = wRecord.context.acceptLoadedPages(stResult); int nw_failed_l = wRecord.context.processLoaderResult(stResult, settings.FRONTIER_doLinkResolver, settings.FRONTIER_doLinkHarvest, settings.FRONTIER_harvestNature, settings.FRONTIER_harvestScope); int newLinks = wRecord.web.webTargets.items.Count() - targetCount; iDataUnit.nw_detected_l = newLinks; iDataUnit.nw_failed_l = nw_failed_l; iDataUnit.tc_detected_l = wRecord.web.webLinks.items.Count(); iDataUnit.tc_loaded_p = wRecord.web.webPages.items.Count(); iDataUnit.tc_detected_p = wRecord.web.webTargets.items.Count(); iDataUnit.tc_ingame_l = wRecord.web.webActiveLinks.items.Count(); iDataUnit.nw_processed_l = linkFlush; //sRecord.timeline.timeSeries[sRecord.iteration] = new PropertyCollectionExtended(); //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_iteration, sRecord.iteration); //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_pagesloaded, ); //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_totallinks, sRecord.web.webLinks.items.Count()); //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_activelinks, ); //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_tasksize, stResult.task.Count()); //sRecord.timeline.timeSeries[sRecord.iteration].add(modelSpiderSiteTimelineEnum.tl_newlinks, newLinks); wRecord.logBuilder.log("Active links [" + wRecord.web.webActiveLinks.items.Count() + "] change [" + newLinks + "]"); return(iDataUnit); }
/// <summary> /// Runs the spider task. /// </summary> /// <param name="sTask">The s task.</param> /// <param name="crawlerContext">The crawler context.</param> /// <returns></returns> public spiderTaskResult runSpiderTask(spiderTask sTask, modelSpiderSiteRecord wRecord) { spiderTaskResult sResult = sTask.createResult(); try { if (imbWEMManager.settings.crawlerJobEngine.crawlerDoParallelTaskLoads) { Parallel.ForEach(sTask, ln => { modelSpiderPageRecord pRecord = wRecord.getChildRecord(ln, ln.url); //.startChildRecord(ln, ln.url); spiderTaskResultItem rItem = runSpiderTaskItem(ln, sTask.doTokenization, pRecord); if (rItem.status != pageStatus.failed) { wRecord.context.targets.AttachPage(rItem, pRecord.logBuilder, blockCount); // <-------------------------------- [ STIZE } sResult.AddResult(rItem); }); } else { foreach (spiderLink ln in sTask) { modelSpiderPageRecord pRecord = wRecord.getChildRecord(ln, ln.url); //.startChildRecord(ln, ln.url); spiderTaskResultItem rItem = runSpiderTaskItem(ln, sTask.doTokenization, pRecord); if (rItem.status != pageStatus.failed) { wRecord.context.targets.AttachPage(rItem, pRecord.logBuilder, blockCount); } sResult.AddResult(rItem); } } } catch (Exception ex) { imbWEMManager.log.log("runSpiderTask exception: " + ex.Message); } loadIndex = loadIndex + sResult.Count(); if (loadIndex > imbWEMManager.settings.crawlerJobEngine.loadCountForGC) { long mem = GC.GetTotalMemory(false); GC.Collect(); GC.WaitForFullGCComplete(); long dmem = GC.GetTotalMemory(false); aceLog.log("Memory allocation reduction [after " + loadIndex + " tasks]: " + (mem - dmem).getMByteCountFormated()); loadIndex = 0; } sResult.finish(); return(sResult); }