public override void eventCrawlJobFinished(crawlerDomainTaskMachine __machine, modelSpiderTestRecord tRecord) { crawlerReportFolder = __machine.reporter.folder; var wRecords = tRecord.GetChildRecords(); string fileprefix = tRecord.instance.name.getFilename(); List <DataTable> iterationTimelines = new List <DataTable>(); foreach (var wRecord in wRecords) { iterationTimelines.Add(wRecord.iterationTableRecord.GetDataTable()); } int dlc_c = 0; if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { tRecord.frontierDLCDataTables[moduleIterationRecordSummary.fra_overview].GetAggregatedTable("fra_overview").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_overview".add(fileprefix, "_"), true); tRecord.frontierDLCDataTables[moduleIterationRecordSummary.all].GetAggregatedTable("fra_modules_all").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_all".add(fileprefix, "_"), true); if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].Any()) { tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].GetAggregatedTable("fra_module_language").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_language_".add(fileprefix, "_"), true); } if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].Any()) { tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].GetAggregatedTable("fra_modules_structure").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_structure_".add(fileprefix, "_"), true); } if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].Any()) { tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].GetAggregatedTable("fra_modules_template").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_template".add(fileprefix, "_"), true); } if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].Any()) { tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].GetAggregatedTable("fra_module_diversity").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_diversity_".add(fileprefix, "_"), true); } string finalOverviewPath = crawlerReportFolder.pathFor("fra_modules_impact".add(fileprefix, "_"), getWritableFileMode.newOrExisting); objectTable <moduleFinalOverview> finalOverview = new objectTable <moduleFinalOverview>(finalOverviewPath, false, "ModuleName", "module_impact"); finalOverview.description = "Aggregate (DLC and iterations) metrics on modules' impact to the result."; aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord> moduleIterationsByModule = new aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord>(); List <moduleIterationRecordSummary> moduleActive = new List <moduleIterationRecordSummary>(); foreach (var wRecord in wRecords) { dlc_c++; foreach (var pair in wRecord.frontierDLC.modRecords) { moduleIterationsByModule.Add(pair.Value.moduleSummaryEnum, pair.Value.GetList()); if (!moduleActive.Contains(pair.Value.moduleSummaryEnum)) { moduleActive.Add(pair.Value.moduleSummaryEnum); } } } int modC = 0; List <moduleFinalOverview> modList = new List <moduleFinalOverview>(); foreach (var modType in moduleActive) { moduleFinalOverview mfo = new moduleFinalOverview(); mfo.deploy(tRecord.instance.name, modType, moduleIterationsByModule[modType], dlc_c); modC += moduleIterationsByModule[modType].Count; finalOverview.AddOrUpdate(mfo); modList.Add(mfo); } moduleFinalOverview mfoSum = new moduleFinalOverview(); mfoSum.deploySum(tRecord.instance.name, modList); finalOverview.AddOrUpdate(mfoSum); foreach (var mfo in modList) { mfo.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord); finalOverview.AddOrUpdate(mfo); record_moduleImpact.AddOrUpdate(mfo); } mfoSum.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord); record_moduleImpact.AddOrUpdate(mfoSum); // finalOverview.SaveAs(finalOverviewPath.add(".xml")); DataTable fover = finalOverview.GetDataTable(null, mfoSum.Crawler); fover.SetAggregationOriginCount(modC); fover.SetAggregationAspect(dataPointAggregationAspect.onTableMultiRow); fover.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_overview", true); record_moduleImpact.Save(); var midt = record_moduleImpact.GetDataTable(null, "Module impacts"); midt.AddExtra("The last benchmark metrics entry [" + imbWEMManager.index.experimentEntry.CrawlID + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString()); midt.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_".add(fileprefix, "_")); } else { dlc_c = tRecord.children.Count(); } if (iterationTimelines.Any()) { DataTable crawlTimeline = iterationTimelines.GetAggregatedTable("Crawler_Timeline", dataPointAggregationAspect.overlapMultiTable); crawlTimeline.SetDescription("Iteration-synced aggregated performance timeline using DLC records [" + wRecords.Count + "] domains."); crawlTimeline.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionCrawlerFolder, imbWEMManager.authorNotation, "timeline_performance_".add(imbWEMManager.index.experimentEntry.Crawler)); } //String atl = "timeline_performance".add(tRecord.instance.name, "_").add("xml", "."); var domainPerfList = tRecord.lastDomainIterationTable.GetList(); var benchmark = new reportPlugIn_benchmarkResults(); //records.GetOrCreate(imbWEMManager.index.experimentEntry.TestID); tRecord.performance.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord); tRecord.performance.jobTimeInMinutes = tRecord.cpuTaker.GetTimeSpanInMinutes(); record_performances.AddOrUpdate(tRecord.performance); benchmark.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord); benchmark.CrawlTime = tRecord.cpuTaker.GetTimeSpanInMinutes(); //tRecord.cpuTaker.GetTimeSpan().TotalMinutes; //.timeFinish.Subtract(tRecord.timeStart).TotalMinutes; benchmark.IP = domainPerfList.Average(x => x.IP); benchmark.IPnominal = domainPerfList.Average(x => x.IPnominal); benchmark.IP_collected = domainPerfList.Average(x => x.IP_collected); benchmark.Lm_collected = domainPerfList.Average(x => x.Lm_collected); benchmark.Lm_recall = domainPerfList.Average(x => x.Lm_recall); benchmark.E_PP = domainPerfList.Average(x => x.E_PP); benchmark.E_TP = domainPerfList.Average(x => x.E_TP); benchmark.IP_recall = domainPerfList.Average(x => x.IP_recall); benchmark.Page_recall = domainPerfList.Average(x => x.Page_recall); benchmark.Term_recall = domainPerfList.Average(x => x.Term_recall); var resourcesamples = tRecord.measureTaker.GetLastSamples(1000); var lastsample = tRecord.measureTaker.GetLastTake(); benchmark.DataLoad = lastsample.bytesLoadedTotal / benchmark.CrawlTime; benchmark.CPU = resourcesamples.Average(x => x.cpuRateOfProcess); benchmark.RAM = resourcesamples.Average(x => x.physicalMemory); records.AddOrUpdate(benchmark); records.Save(); var dt = records.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID); dt.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString()); dt.SetAdditionalInfoEntry("DLC Threads - TC", __machine.maxThreads); dt.SetAdditionalInfoEntry("LoadTake - LT", tRecord.instance.settings.limitIterationNewLinks); dt.SetAdditionalInfoEntry("PageLoads - PL", tRecord.instance.settings.limitTotalPageLoad); dt.SetAdditionalInfoEntry("Sample size - DC", dlc_c); dt.SetAdditionalInfoEntry("Session ID", imbWEMManager.index.experimentEntry.SessionID); dt.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "result", true); benchmark.GetUserManualSaved(crawlerReportFolder.pathFor("crawler\\result.txt")); // crawlTimeline.saveObjectToXML(homeFolder.pathFor(atl)); // crawlTimeline.saveObjectToXML(reportFolder.pathFor(atl)); // all three modules summary imbWEMManager.settings.directReportEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_reportEngine.txt")); imbWEMManager.settings.crawlerJobEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawlJobEngine.txt")); imbWEMManager.settings.executionLog.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_executionLogs.txt")); tRecord.instance.settings.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawler.txt")); record_performances.Save(); var perfDT = record_performances.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID); perfDT.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString()); perfDT.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "crawl_performances", true); }
//public void deploy(performanceDataLoad dataLoadTaker, performanceCpu cpuTaker, Int32 pageLoadOverride = -1) //{ //} public void deploy(modelSpiderTestRecord tRecord = null) { List <string> __termsSerbian = new List <string>(); List <string> __termsOther = new List <string>(); List <string> __termsAll = new List <string>(); List <string> __blocksAll = new List <string>(); // List<String> __blocksSerbianAll = new List<string>(); domainCrashedList = tRecord.crashedDomains.ToList().toCsvInLine(","); domainCrashed = tRecord.crashedDomains.Count(); int __pageLoadByIterationRecord = 0; int __pageLoadByTargets = 0; int __pageLoadDuplicate = 0; double __FRATimeAvgSum = 0; double __ITETimeAvgSum = 0; int __FRATimeTakes = 0; int __iterations = 0; foreach (var wRecord in tRecord.GetChildRecords()) { double __FRATimeSum = 0; double __ITETimeSum = 0; __termsSerbian.AddRange(wRecord.context.targets.termSerbian); __termsOther.AddRange(wRecord.context.targets.termOther); __termsAll.AddRange(wRecord.context.targets.termsAll); __pageLoadByTargets += wRecord.context.targets.GetLoaded().Count(); var lastIter = wRecord.iterationTableRecord.GetLastEntry(); if (lastIter != null) { __pageLoadByIterationRecord += lastIter.loadedPageCount; } __pageLoadDuplicate += wRecord.duplicateCount; foreach (nodeBlock nb in wRecord.context.targets.blocks.GetBlockList()) { __blocksAll.AddUnique(nb.textHash); //if (nb.isSerbianContent) //{ // __blocksSerbianAll.AddUnique(nb.textHash); //} } int __itCount = 0; foreach (iterationPerformanceRecord iteration in wRecord.iterationTableRecord) { __FRATimeSum += iteration.FRA_SummaryRuntime; __ITETimeSum += iteration.time_duration_s; __itCount++; __iterations++; } __FRATimeSum = __FRATimeSum / (double)__itCount; __ITETimeSum = __ITETimeSum / (double)__itCount; __FRATimeAvgSum += __FRATimeSum; __ITETimeAvgSum += __ITETimeSum; __FRATimeTakes++; } Iterations = __iterations; __FRATimeAvgSum = __FRATimeAvgSum / (double)__FRATimeTakes; __ITETimeAvgSum = __ITETimeAvgSum / (double)__FRATimeTakes; FRA_TimePercent = __FRATimeAvgSum / __ITETimeAvgSum; double __noFRATimeAvgSum = __ITETimeAvgSum - __FRATimeAvgSum; ContentProcessor_TimePercent = __noFRATimeAvgSum / __ITETimeAvgSum; IterationTimeAvg = __ITETimeAvgSum; termsRecoveredAll = __termsAll.Count(); termsRecoveredOther = __termsOther.Count(); termsRecoveredSerbian = __termsSerbian.Count(); blocksRecovered = __blocksAll.Count(); // blocksRecoveredSerbian = __blocksSerbianAll.Count(); //tRecord.allTerms = termsRecoveredAll; TimeSpan timeSpan = tRecord.cpuTaker.GetTimeSpan(); jobTimeInMinutes = timeSpan.TotalMinutes; relevantPageLoads = tRecord.relevantPages.Count; //pageLoads = tRecord.allUrls.Count; pageLoads = __pageLoadByTargets; pageLoadDuplicate = __pageLoadDuplicate; pageLoadByIterationRecord = __pageLoadByIterationRecord; pageLoadsReal = tRecord.dataLoadTaker.pageLoads; domainsLoaded = tRecord.aRecord.sample.Count(); cpuAverage = tRecord.cpuTaker.GetAverage(); loadTotal = tRecord.dataLoadTaker.GetLastTake().reading; //Convert.ToUInt64(dataLoadTaker.lastTake.reading); loadAverage = loadTotal / (double)timeSpan.TotalMinutes; double loadTotalKb = loadTotal / (double)1048576; if (domainsLoaded == 0) { //new aceGeneralException(nameof(domainsLoaded) + " is zero", null, this, nameof(performanceRecord) + " error in " + nameof(deploy) + " method."); return; } loadMbPerMinute = (double)loadTotalKb / (double)timeSpan.TotalMinutes; dataLoadPerDomain = loadTotal / (double)domainsLoaded; pageLoadsPerDomain = (double)pageLoads / (double)domainsLoaded; if (relevantPageLoads == 0) { relevantPagePerDomain = 0; relevantVsLoadedAverage = 0; } else { relevantPagePerDomain = (double)relevantPageLoads / (double)domainsLoaded; } if (pageLoads > 0) { relevantVsLoadedAverage = ((double)relevantPageLoads / (double)pageLoads); } timePerDomain = jobTimeInMinutes / ((double)domainsLoaded); termsPerPageLoads = ((double)termsRecoveredAll) / ((double)pageLoads); blocksPerPageLoads = ((double)blocksRecovered) / ((double)pageLoads); }