Beispiel #1
0
        public override void eventCrawlJobFinished(crawlerDomainTaskMachine __machine, modelSpiderTestRecord tRecord)
        {
            crawlerReportFolder = __machine.reporter.folder;

            var    wRecords   = tRecord.GetChildRecords();
            string fileprefix = tRecord.instance.name.getFilename();

            List <DataTable> iterationTimelines = new List <DataTable>();

            foreach (var wRecord in wRecords)
            {
                iterationTimelines.Add(wRecord.iterationTableRecord.GetDataTable());
            }
            int dlc_c = 0;

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                tRecord.frontierDLCDataTables[moduleIterationRecordSummary.fra_overview].GetAggregatedTable("fra_overview").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_overview".add(fileprefix, "_"), true);

                tRecord.frontierDLCDataTables[moduleIterationRecordSummary.all].GetAggregatedTable("fra_modules_all").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_all".add(fileprefix, "_"), true);


                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].GetAggregatedTable("fra_module_language").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_language_".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].GetAggregatedTable("fra_modules_structure").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_structure_".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].GetAggregatedTable("fra_modules_template").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_template".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].GetAggregatedTable("fra_module_diversity").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_diversity_".add(fileprefix, "_"), true);
                }


                string finalOverviewPath = crawlerReportFolder.pathFor("fra_modules_impact".add(fileprefix, "_"), getWritableFileMode.newOrExisting);
                objectTable <moduleFinalOverview> finalOverview = new objectTable <moduleFinalOverview>(finalOverviewPath, false, "ModuleName", "module_impact");
                finalOverview.description = "Aggregate (DLC and iterations) metrics on modules' impact to the result.";

                aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord> moduleIterationsByModule = new aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord>();
                List <moduleIterationRecordSummary> moduleActive = new List <moduleIterationRecordSummary>();

                foreach (var wRecord in wRecords)
                {
                    dlc_c++;
                    foreach (var pair in wRecord.frontierDLC.modRecords)
                    {
                        moduleIterationsByModule.Add(pair.Value.moduleSummaryEnum, pair.Value.GetList());
                        if (!moduleActive.Contains(pair.Value.moduleSummaryEnum))
                        {
                            moduleActive.Add(pair.Value.moduleSummaryEnum);
                        }
                    }
                }

                int modC = 0;
                List <moduleFinalOverview> modList = new List <moduleFinalOverview>();
                foreach (var modType in moduleActive)
                {
                    moduleFinalOverview mfo = new moduleFinalOverview();
                    mfo.deploy(tRecord.instance.name, modType, moduleIterationsByModule[modType], dlc_c);
                    modC += moduleIterationsByModule[modType].Count;
                    finalOverview.AddOrUpdate(mfo);
                    modList.Add(mfo);
                }

                moduleFinalOverview mfoSum = new moduleFinalOverview();


                mfoSum.deploySum(tRecord.instance.name, modList);
                finalOverview.AddOrUpdate(mfoSum);

                foreach (var mfo in modList)
                {
                    mfo.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);
                    finalOverview.AddOrUpdate(mfo);

                    record_moduleImpact.AddOrUpdate(mfo);
                }


                mfoSum.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);
                record_moduleImpact.AddOrUpdate(mfoSum);

                //    finalOverview.SaveAs(finalOverviewPath.add(".xml"));
                DataTable fover = finalOverview.GetDataTable(null, mfoSum.Crawler);

                fover.SetAggregationOriginCount(modC);
                fover.SetAggregationAspect(dataPointAggregationAspect.onTableMultiRow);
                fover.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_overview", true);


                record_moduleImpact.Save();
                var midt = record_moduleImpact.GetDataTable(null, "Module impacts");
                midt.AddExtra("The last benchmark metrics entry [" + imbWEMManager.index.experimentEntry.CrawlID + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());
                midt.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_".add(fileprefix, "_"));
            }
            else
            {
                dlc_c = tRecord.children.Count();
            }

            if (iterationTimelines.Any())
            {
                DataTable crawlTimeline = iterationTimelines.GetAggregatedTable("Crawler_Timeline", dataPointAggregationAspect.overlapMultiTable);
                crawlTimeline.SetDescription("Iteration-synced aggregated performance timeline using DLC records [" + wRecords.Count + "] domains.");
                crawlTimeline.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionCrawlerFolder, imbWEMManager.authorNotation, "timeline_performance_".add(imbWEMManager.index.experimentEntry.Crawler));
            }
            //String atl = "timeline_performance".add(tRecord.instance.name, "_").add("xml", ".");

            var domainPerfList = tRecord.lastDomainIterationTable.GetList();

            var benchmark = new reportPlugIn_benchmarkResults(); //records.GetOrCreate(imbWEMManager.index.experimentEntry.TestID);

            tRecord.performance.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);

            tRecord.performance.jobTimeInMinutes = tRecord.cpuTaker.GetTimeSpanInMinutes();

            record_performances.AddOrUpdate(tRecord.performance);

            benchmark.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);


            benchmark.CrawlTime = tRecord.cpuTaker.GetTimeSpanInMinutes(); //tRecord.cpuTaker.GetTimeSpan().TotalMinutes; //.timeFinish.Subtract(tRecord.timeStart).TotalMinutes;


            benchmark.IP           = domainPerfList.Average(x => x.IP);
            benchmark.IPnominal    = domainPerfList.Average(x => x.IPnominal);
            benchmark.IP_collected = domainPerfList.Average(x => x.IP_collected);
            benchmark.Lm_collected = domainPerfList.Average(x => x.Lm_collected);
            benchmark.Lm_recall    = domainPerfList.Average(x => x.Lm_recall);
            benchmark.E_PP         = domainPerfList.Average(x => x.E_PP);
            benchmark.E_TP         = domainPerfList.Average(x => x.E_TP);
            benchmark.IP_recall    = domainPerfList.Average(x => x.IP_recall);
            benchmark.Page_recall  = domainPerfList.Average(x => x.Page_recall);
            benchmark.Term_recall  = domainPerfList.Average(x => x.Term_recall);

            var resourcesamples = tRecord.measureTaker.GetLastSamples(1000);
            var lastsample      = tRecord.measureTaker.GetLastTake();

            benchmark.DataLoad = lastsample.bytesLoadedTotal / benchmark.CrawlTime;
            benchmark.CPU      = resourcesamples.Average(x => x.cpuRateOfProcess);
            benchmark.RAM      = resourcesamples.Average(x => x.physicalMemory);

            records.AddOrUpdate(benchmark);
            records.Save();

            var dt = records.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID);

            dt.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());

            dt.SetAdditionalInfoEntry("DLC Threads - TC", __machine.maxThreads);
            dt.SetAdditionalInfoEntry("LoadTake - LT", tRecord.instance.settings.limitIterationNewLinks);
            dt.SetAdditionalInfoEntry("PageLoads - PL", tRecord.instance.settings.limitTotalPageLoad);
            dt.SetAdditionalInfoEntry("Sample size - DC", dlc_c);
            dt.SetAdditionalInfoEntry("Session ID", imbWEMManager.index.experimentEntry.SessionID);



            dt.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "result", true);

            benchmark.GetUserManualSaved(crawlerReportFolder.pathFor("crawler\\result.txt"));

            //  crawlTimeline.saveObjectToXML(homeFolder.pathFor(atl));
            //  crawlTimeline.saveObjectToXML(reportFolder.pathFor(atl));

            // all three modules summary

            imbWEMManager.settings.directReportEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_reportEngine.txt"));
            imbWEMManager.settings.crawlerJobEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawlJobEngine.txt"));
            imbWEMManager.settings.executionLog.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_executionLogs.txt"));

            tRecord.instance.settings.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawler.txt"));
            record_performances.Save();
            var perfDT = record_performances.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID);

            perfDT.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());

            perfDT.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "crawl_performances", true);
        }
Beispiel #2
0
        //public void deploy(performanceDataLoad dataLoadTaker, performanceCpu cpuTaker, Int32 pageLoadOverride = -1)
        //{



        //}

        public void deploy(modelSpiderTestRecord tRecord = null)
        {
            List <string> __termsSerbian = new List <string>();
            List <string> __termsOther   = new List <string>();
            List <string> __termsAll     = new List <string>();

            List <string> __blocksAll = new List <string>();

            // List<String> __blocksSerbianAll = new List<string>();

            domainCrashedList = tRecord.crashedDomains.ToList().toCsvInLine(",");
            domainCrashed     = tRecord.crashedDomains.Count();
            int __pageLoadByIterationRecord = 0;
            int __pageLoadByTargets         = 0;
            int __pageLoadDuplicate         = 0;

            double __FRATimeAvgSum = 0;
            double __ITETimeAvgSum = 0;
            int    __FRATimeTakes  = 0;
            int    __iterations    = 0;

            foreach (var wRecord in tRecord.GetChildRecords())
            {
                double __FRATimeSum = 0;
                double __ITETimeSum = 0;

                __termsSerbian.AddRange(wRecord.context.targets.termSerbian);
                __termsOther.AddRange(wRecord.context.targets.termOther);
                __termsAll.AddRange(wRecord.context.targets.termsAll);

                __pageLoadByTargets += wRecord.context.targets.GetLoaded().Count();

                var lastIter = wRecord.iterationTableRecord.GetLastEntry();
                if (lastIter != null)
                {
                    __pageLoadByIterationRecord += lastIter.loadedPageCount;
                }

                __pageLoadDuplicate += wRecord.duplicateCount;

                foreach (nodeBlock nb in wRecord.context.targets.blocks.GetBlockList())
                {
                    __blocksAll.AddUnique(nb.textHash);

                    //if (nb.isSerbianContent)
                    //{
                    //    __blocksSerbianAll.AddUnique(nb.textHash);
                    //}
                }

                int __itCount = 0;
                foreach (iterationPerformanceRecord iteration in wRecord.iterationTableRecord)
                {
                    __FRATimeSum += iteration.FRA_SummaryRuntime;
                    __ITETimeSum += iteration.time_duration_s;
                    __itCount++;
                    __iterations++;
                }


                __FRATimeSum = __FRATimeSum / (double)__itCount;
                __ITETimeSum = __ITETimeSum / (double)__itCount;

                __FRATimeAvgSum += __FRATimeSum;
                __ITETimeAvgSum += __ITETimeSum;
                __FRATimeTakes++;
            }

            Iterations = __iterations;

            __FRATimeAvgSum = __FRATimeAvgSum / (double)__FRATimeTakes;
            __ITETimeAvgSum = __ITETimeAvgSum / (double)__FRATimeTakes;

            FRA_TimePercent = __FRATimeAvgSum / __ITETimeAvgSum;

            double __noFRATimeAvgSum = __ITETimeAvgSum - __FRATimeAvgSum;

            ContentProcessor_TimePercent = __noFRATimeAvgSum / __ITETimeAvgSum;

            IterationTimeAvg = __ITETimeAvgSum;

            termsRecoveredAll     = __termsAll.Count();
            termsRecoveredOther   = __termsOther.Count();
            termsRecoveredSerbian = __termsSerbian.Count();

            blocksRecovered = __blocksAll.Count();
            //  blocksRecoveredSerbian = __blocksSerbianAll.Count();
            //tRecord.allTerms = termsRecoveredAll;



            TimeSpan timeSpan = tRecord.cpuTaker.GetTimeSpan();

            jobTimeInMinutes = timeSpan.TotalMinutes;


            relevantPageLoads = tRecord.relevantPages.Count;

            //pageLoads = tRecord.allUrls.Count;

            pageLoads                 = __pageLoadByTargets;
            pageLoadDuplicate         = __pageLoadDuplicate;
            pageLoadByIterationRecord = __pageLoadByIterationRecord;

            pageLoadsReal = tRecord.dataLoadTaker.pageLoads;

            domainsLoaded = tRecord.aRecord.sample.Count();

            cpuAverage = tRecord.cpuTaker.GetAverage();


            loadTotal   = tRecord.dataLoadTaker.GetLastTake().reading; //Convert.ToUInt64(dataLoadTaker.lastTake.reading);
            loadAverage = loadTotal / (double)timeSpan.TotalMinutes;



            double loadTotalKb = loadTotal / (double)1048576;

            if (domainsLoaded == 0)
            {
                //new aceGeneralException(nameof(domainsLoaded) + " is zero", null, this, nameof(performanceRecord) + " error in " + nameof(deploy) + " method.");

                return;
            }

            loadMbPerMinute = (double)loadTotalKb / (double)timeSpan.TotalMinutes;

            dataLoadPerDomain = loadTotal / (double)domainsLoaded;

            pageLoadsPerDomain = (double)pageLoads / (double)domainsLoaded;



            if (relevantPageLoads == 0)
            {
                relevantPagePerDomain   = 0;
                relevantVsLoadedAverage = 0;
            }
            else
            {
                relevantPagePerDomain = (double)relevantPageLoads / (double)domainsLoaded;
            }

            if (pageLoads > 0)
            {
                relevantVsLoadedAverage = ((double)relevantPageLoads / (double)pageLoads);
            }


            timePerDomain = jobTimeInMinutes / ((double)domainsLoaded);

            termsPerPageLoads  = ((double)termsRecoveredAll) / ((double)pageLoads);
            blocksPerPageLoads = ((double)blocksRecovered) / ((double)pageLoads);
        }