Пример #1
0
        /// <summary>Performs post-processing of data collected by the workload plugin</summary>
        /// <remarks><para>Loads all saved DataTables, groups rows in averages for each measure group and creates summary table with all experiments</para></remarks>
        /// <param name="searchPattern">pattern used to select input files</param>
        /// <param name="groupColumn">column name used for row grouping</param>
        /// <param name="overviewColumns">columns to include in overview table</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runWorkloadData(
            [Description("pattern used to select input files")] string searchPattern = "results*.xml",
            [Description("column name used for row grouping")] string groupColumn    = "measureGroup")
        // [Description("columns to include in overview table")] String overviewColumns = "DataLoad,CrawlerIterations,ContentPages,dlcMaximum")
        {
            aceOperation_selectFiles(searchPattern, "index\\workload", true);

            folder = folder["index\\workload"];

            List <DataTable> tables = new List <DataTable>();

            dataPointAggregationType aggType = dataPointAggregationType.avg;

            int ci = 1;
            int c  = selectedFiles.Count();

            output.log("[" + c + "] DataTable in the cue.");


            List <DataTable> allTables = new List <DataTable>();
            DataSet          dSet      = new DataSet();


            aceDictionarySet <string, DataTable> byCrawler = new aceDictionarySet <string, DataTable>();
            aceDictionarySet <string, DataTableForStatistics> byCrawlerRT = new aceDictionarySet <string, DataTableForStatistics>();

            DataTableForStatistics rt = null;

            foreach (FileInfo fi in selectedFiles)
            {
                try
                {
                    objectTable <reportPlugIn_workloadEntry> workloadEntry = new objectTable <reportPlugIn_workloadEntry>(fi.FullName, true, "EntryID", "");

                    objectTable <reportPlugIn_workloadEntry> workloadGrouped = new objectTable <reportPlugIn_workloadEntry>("EntryID", "aggregated");

                    aceDictionarySet <int, reportPlugIn_workloadEntry> workloadGroups = workloadEntry.GetGroups <int>(groupColumn, "terminationWarning = 0");

                    collectionAggregationResultSet <reportPlugIn_workloadEntry> aggregateSet = new collectionAggregationResultSet <reportPlugIn_workloadEntry>();



                    foreach (var set in workloadGroups)
                    {
                        collectionAggregationResult <reportPlugIn_workloadEntry> aggregates = null;
                        aggregates = set.Value.GetAggregates(aggType);

                        var aggregate = aggregates[aggType];
                        aggregate.measureGroup = set.Key;
                        aggregate.EntryID      = set.Key.ToString("D5") + "_" + aggType.ToString();
                        workloadGrouped.AddOrUpdate(aggregate);
                        aggregateSet.Add(aggregate.EntryID + "_" + fi.Name, aggregates);
                    }

                    string filename = (fi.Name + "_" + groupColumn + "_" + aggType.ToString()).getFilename();

                    string n = reportPlugIn_workload_state.ExtractEntryID(aggregateSet.lastItem.EntryID) + dSet.Tables.Count.ToString("D2");

                    DataTable dt = workloadGrouped.GetDataTable(dSet, n);
                    dt.SetDescription("Collection of [" + aggregateSet.recordType.Name + "] records grouped by [" + groupColumn + "]");
                    dt.SetAggregationAspect(dataPointAggregationAspect.subSetOfRows);
                    dt.SetAggregationOriginCount(aggregateSet.Count);
                    dt.SetAdditionalInfoEntry("Aggregation Type:", aggType);
                    dt.SetAdditionalInfoEntry("Data source file:", fi.Name);

                    dt.SetAdditionalInfoEntries("Last", aggregateSet.lastItem, typeof(string));

                    dt.SetTitle(n);

                    byCrawler.Add(aggregateSet.firstItem.Crawler, dt);

                    // dt.TableName = n;
                    //   dSet.AddTable(dt);


                    rt = dt.GetReportAndSave(folder, imbWEMManager.authorNotation, n.getFilename(), true);
                    byCrawlerRT.Add(aggregateSet.firstItem.Crawler, rt);
                    response.AppendLine("[" + ci + " / " + c + "] DataTable [" + fi.Name + "] had [" + workloadGroups.Keys.Count() + "] groups. Result saved as: " + filename);
                    ci++;
                } catch (Exception ex)
                {
                    output.log("[" + ci + " / " + c + "] DataTable [" + fi.FullName + "] failed.");
                    output.log(ex.Message);
                }
            }



            output.log("[" + c + "] DataTable processed.");

            dSet.serializeDataSet("workload_all", folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);

            foreach (string key in byCrawler.Keys)
            {
                string  filename = key.getFilename();
                DataSet sd       = new DataSet(key);
                foreach (DataTable dti in byCrawler[key])
                {
                    sd.AddTable(dti.Copy());
                }

                sd.AddTable(byCrawlerRT[key].First().RenderLegend());
                sd.serializeDataSet(filename, folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);
            }
        }
Пример #2
0
        /// <summary>Exporting domain list according to criteria specified</summary>
        /// <remarks><para>It will select domains using existing data. If index name not specified it will scan index repository and ask user to pick one</para></remarks>
        /// <param name="indexName">name of the index to harvest sample from - IndexID</param>
        /// <param name="minPages">required min. number of crawled/indexed pages in the doman--</param>
        /// <param name="minRelevant">required min. number of relevant pages in the index for the domain</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runIndexData(
            [Description("name of the index to harvest sample from - IndexID")] string indexName                = "MainIndex",
            [Description("required min. number of crawled/indexed pages in the doman--")] int minPages          = 30,
            [Description("required min. number of relevant pages in the index for the domain")] int minRelevant = 10)
        {
            if ((indexName == "*") || indexName.isNullOrEmpty())
            {
                List <string> indexList = imbWEMManager.index.GetIndexList();
                indexList.Add("*");
                aceTerminalInput.askForOption("Choose index to work with - or confirm * to load all indexes:", "*", indexList);
            }

            indexDatabaseStandalone indexDb = new indexDatabaseStandalone(indexName);



            imbWEMManager.index.OpenIndex(indexName, "plugin_dataLoader");

            imbWEMManager.index.pageIndexTable.ReadOnlyMode   = true;
            imbWEMManager.index.domainIndexTable.ReadOnlyMode = true;
            List <indexDomain> d_list = new List <indexDomain>();

            List <indexPage> pages = imbWEMManager.index.pageIndexTable.GetPagesAndDomains(indexPageEvaluationEntryState.inTheIndex, out d_list);

            aceDictionarySet <indexDomain, indexPage> dict = new aceDictionarySet <indexDomain, indexPage>();
            List <string>      list    = new List <string>();
            List <indexDomain> domains = new List <indexDomain>();

            foreach (indexDomain domain in d_list)
            {
                List <indexPage> pl = Enumerable.Where(pages, x => x.url.Contains(domain.domain)).ToList();
                dict.Add(domain, pl);
                int prc = 0;
                if (pl.Count() > minPages)
                {
                    foreach (indexPage ip in pl)
                    {
                        if (ip.relevancyText == "isRelevant")
                        {
                            prc++;
                        }
                        if (prc > minRelevant)
                        {
                            output.AppendLine($" {domain.domain} P[_{pl.Count()}_] Pr[_{prc}_] --> accepted, stop counting");
                            //  domains.Add(domain);
                            list.Add(domain.domain);
                            break;
                        }
                    }
                }
            }

            //  var domains = imbWEMManager.index.domainIndexTable.GetWhere(nameof(indexDomain.relevantPages) + " > " + minRelevant);
            // domains = domains.Where(x => ((x.relevantPages + x.notRelevantPages) > minPages)).ToList();
            string sampleName = indexName.add("Pa" + minPages + "Pr" + minRelevant, "_").add("txt", ".");

            domains.ForEach(x => list.Add(x.url));


            objectTable <indexDomain> dTable = new objectTable <indexDomain>("url", sampleName);

            domains.ForEach(x => dTable.AddOrUpdate(x));

            dTable.GetDataTable(null, sampleName).GetReportAndSave(folder, imbWEMManager.authorNotation, sampleName, true);

            folder = imbWEMManager.index.folder;

            string p = folder.pathFor(sampleName);

            list.saveContentOnFilePath(p);

            output.log("Exported sample saved to: " + p);
        }
Пример #3
0
        /// <summary>Recalculating time by importing dt_dataLoad exported table and updating performance exports for each crawl</summary>
        /// <remarks><para>It will load the results record for opened session to find all crawls, and import all dt_dataLoad Excel tables to sum sampling periods</para></remarks>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_repairRecoverTime()
        {
            var __recordKeyProperty = "TestID";
            var homeFolder          = new folderNode("index\\benchmark", "Home folder of plugin: benchmark ", "Internal data for pluting benchmark");
            var recordName          = imbWEMManager.index.experimentEntry.SessionID;

            var records = new objectTable <reportPlugIn_benchmarkResults>(homeFolder.pathFor(recordName.add("results", "_")), true, __recordKeyProperty, name);

            records.description = "Summary report on the most relevant evaluation metrics.";

            var record_performances = new objectTable <performanceRecord>(homeFolder.pathFor(recordName.add("performances", "_")), true, "TestID", name);

            var record_moduleImpact = new objectTable <moduleFinalOverview>(homeFolder.pathFor(recordName.add("modules", "_")), true, "ModuleName", name);

            // <---- making crawler list
            List <string> crawlerList = new List <string>();

            List <reportPlugIn_benchmarkResults> allRecords = records.GetList();
            var reportFolder = imbWEMManager.index.experimentEntry.sessionReportFolder;

            Dictionary <string, string> pathsForResultExcel = new Dictionary <string, string>();

            Dictionary <string, folderNode> foldersForResultExcel = new Dictionary <string, folderNode>();


            foreach (reportPlugIn_benchmarkResults result in allRecords)
            {
                crawlerList.Add(result.Crawler);
                output.AppendLine("Crawl found: " + result.Crawler);

                string pathCrawlerId = result.Crawler.Replace("-", "");

                folderNode resultNode = reportFolder[pathCrawlerId.ToUpper() + "\\crawler\\data"];

                string pathForData = resultNode.pathFor("dc_dataload_" + result.Crawler.ToLower() + ".csv");
                //String pathForResult = reportFolder.pathFor(pathCrawlerId);

                foldersForResultExcel.Add(result.Crawler, resultNode);

                //foldersForResultExcel.Add(result.Crawler, pathForResult);


                // String path = reportFolder.pathFor(pathForData, getWritableFileMode.existing);
                output.AppendLine("Loading datatable: " + pathForData);

                DataTable dataTable = null;
                dataTable = pathForData.deserializeDataTable(dataTableExportEnum.csv);
                output.AppendLine("DataTable loaded - rows[" + dataTable.Rows.Count + "]");
                DataColumn periodColumn = dataTable.Columns["Period"];
                double     periodSum    = 0;
                foreach (DataRow dr in dataTable.Rows)
                {
                    string read  = dr[periodColumn].ToString().Replace(",", ".");
                    double readD = double.Parse(read);
                    periodSum += readD;
                }

                output.AppendLine("Total execution time in seconds: " + periodSum.ToString("F5"));

                result.CrawlTime = periodSum / ((double)60);
                records.AddOrUpdate(result);
            }

            foreach (reportPlugIn_benchmarkResults result in allRecords)
            {
                folderNode resultFolder = foldersForResultExcel[result.Crawler];

                records.GetDataTable().GetReportAndSave(resultFolder, imbWEMManager.authorNotation, "results_timefix", true);

                output.AppendLine("Repaired result table saved to: " + resultFolder.path);
                // <---- fixing crawler results
            }
        }
Пример #4
0
        public override void eventCrawlJobFinished(crawlerDomainTaskMachine __machine, modelSpiderTestRecord tRecord)
        {
            crawlerReportFolder = __machine.reporter.folder;

            var    wRecords   = tRecord.GetChildRecords();
            string fileprefix = tRecord.instance.name.getFilename();

            List <DataTable> iterationTimelines = new List <DataTable>();

            foreach (var wRecord in wRecords)
            {
                iterationTimelines.Add(wRecord.iterationTableRecord.GetDataTable());
            }
            int dlc_c = 0;

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                tRecord.frontierDLCDataTables[moduleIterationRecordSummary.fra_overview].GetAggregatedTable("fra_overview").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_overview".add(fileprefix, "_"), true);

                tRecord.frontierDLCDataTables[moduleIterationRecordSummary.all].GetAggregatedTable("fra_modules_all").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_all".add(fileprefix, "_"), true);


                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].GetAggregatedTable("fra_module_language").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_language_".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].GetAggregatedTable("fra_modules_structure").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_structure_".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].GetAggregatedTable("fra_modules_template").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_template".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].GetAggregatedTable("fra_module_diversity").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_diversity_".add(fileprefix, "_"), true);
                }


                string finalOverviewPath = crawlerReportFolder.pathFor("fra_modules_impact".add(fileprefix, "_"), getWritableFileMode.newOrExisting);
                objectTable <moduleFinalOverview> finalOverview = new objectTable <moduleFinalOverview>(finalOverviewPath, false, "ModuleName", "module_impact");
                finalOverview.description = "Aggregate (DLC and iterations) metrics on modules' impact to the result.";

                aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord> moduleIterationsByModule = new aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord>();
                List <moduleIterationRecordSummary> moduleActive = new List <moduleIterationRecordSummary>();

                foreach (var wRecord in wRecords)
                {
                    dlc_c++;
                    foreach (var pair in wRecord.frontierDLC.modRecords)
                    {
                        moduleIterationsByModule.Add(pair.Value.moduleSummaryEnum, pair.Value.GetList());
                        if (!moduleActive.Contains(pair.Value.moduleSummaryEnum))
                        {
                            moduleActive.Add(pair.Value.moduleSummaryEnum);
                        }
                    }
                }

                int modC = 0;
                List <moduleFinalOverview> modList = new List <moduleFinalOverview>();
                foreach (var modType in moduleActive)
                {
                    moduleFinalOverview mfo = new moduleFinalOverview();
                    mfo.deploy(tRecord.instance.name, modType, moduleIterationsByModule[modType], dlc_c);
                    modC += moduleIterationsByModule[modType].Count;
                    finalOverview.AddOrUpdate(mfo);
                    modList.Add(mfo);
                }

                moduleFinalOverview mfoSum = new moduleFinalOverview();


                mfoSum.deploySum(tRecord.instance.name, modList);
                finalOverview.AddOrUpdate(mfoSum);

                foreach (var mfo in modList)
                {
                    mfo.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);
                    finalOverview.AddOrUpdate(mfo);

                    record_moduleImpact.AddOrUpdate(mfo);
                }


                mfoSum.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);
                record_moduleImpact.AddOrUpdate(mfoSum);

                //    finalOverview.SaveAs(finalOverviewPath.add(".xml"));
                DataTable fover = finalOverview.GetDataTable(null, mfoSum.Crawler);

                fover.SetAggregationOriginCount(modC);
                fover.SetAggregationAspect(dataPointAggregationAspect.onTableMultiRow);
                fover.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_overview", true);


                record_moduleImpact.Save();
                var midt = record_moduleImpact.GetDataTable(null, "Module impacts");
                midt.AddExtra("The last benchmark metrics entry [" + imbWEMManager.index.experimentEntry.CrawlID + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());
                midt.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_".add(fileprefix, "_"));
            }
            else
            {
                dlc_c = tRecord.children.Count();
            }

            if (iterationTimelines.Any())
            {
                DataTable crawlTimeline = iterationTimelines.GetAggregatedTable("Crawler_Timeline", dataPointAggregationAspect.overlapMultiTable);
                crawlTimeline.SetDescription("Iteration-synced aggregated performance timeline using DLC records [" + wRecords.Count + "] domains.");
                crawlTimeline.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionCrawlerFolder, imbWEMManager.authorNotation, "timeline_performance_".add(imbWEMManager.index.experimentEntry.Crawler));
            }
            //String atl = "timeline_performance".add(tRecord.instance.name, "_").add("xml", ".");

            var domainPerfList = tRecord.lastDomainIterationTable.GetList();

            var benchmark = new reportPlugIn_benchmarkResults(); //records.GetOrCreate(imbWEMManager.index.experimentEntry.TestID);

            tRecord.performance.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);

            tRecord.performance.jobTimeInMinutes = tRecord.cpuTaker.GetTimeSpanInMinutes();

            record_performances.AddOrUpdate(tRecord.performance);

            benchmark.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);


            benchmark.CrawlTime = tRecord.cpuTaker.GetTimeSpanInMinutes(); //tRecord.cpuTaker.GetTimeSpan().TotalMinutes; //.timeFinish.Subtract(tRecord.timeStart).TotalMinutes;


            benchmark.IP           = domainPerfList.Average(x => x.IP);
            benchmark.IPnominal    = domainPerfList.Average(x => x.IPnominal);
            benchmark.IP_collected = domainPerfList.Average(x => x.IP_collected);
            benchmark.Lm_collected = domainPerfList.Average(x => x.Lm_collected);
            benchmark.Lm_recall    = domainPerfList.Average(x => x.Lm_recall);
            benchmark.E_PP         = domainPerfList.Average(x => x.E_PP);
            benchmark.E_TP         = domainPerfList.Average(x => x.E_TP);
            benchmark.IP_recall    = domainPerfList.Average(x => x.IP_recall);
            benchmark.Page_recall  = domainPerfList.Average(x => x.Page_recall);
            benchmark.Term_recall  = domainPerfList.Average(x => x.Term_recall);

            var resourcesamples = tRecord.measureTaker.GetLastSamples(1000);
            var lastsample      = tRecord.measureTaker.GetLastTake();

            benchmark.DataLoad = lastsample.bytesLoadedTotal / benchmark.CrawlTime;
            benchmark.CPU      = resourcesamples.Average(x => x.cpuRateOfProcess);
            benchmark.RAM      = resourcesamples.Average(x => x.physicalMemory);

            records.AddOrUpdate(benchmark);
            records.Save();

            var dt = records.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID);

            dt.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());

            dt.SetAdditionalInfoEntry("DLC Threads - TC", __machine.maxThreads);
            dt.SetAdditionalInfoEntry("LoadTake - LT", tRecord.instance.settings.limitIterationNewLinks);
            dt.SetAdditionalInfoEntry("PageLoads - PL", tRecord.instance.settings.limitTotalPageLoad);
            dt.SetAdditionalInfoEntry("Sample size - DC", dlc_c);
            dt.SetAdditionalInfoEntry("Session ID", imbWEMManager.index.experimentEntry.SessionID);



            dt.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "result", true);

            benchmark.GetUserManualSaved(crawlerReportFolder.pathFor("crawler\\result.txt"));

            //  crawlTimeline.saveObjectToXML(homeFolder.pathFor(atl));
            //  crawlTimeline.saveObjectToXML(reportFolder.pathFor(atl));

            // all three modules summary

            imbWEMManager.settings.directReportEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_reportEngine.txt"));
            imbWEMManager.settings.crawlerJobEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawlJobEngine.txt"));
            imbWEMManager.settings.executionLog.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_executionLogs.txt"));

            tRecord.instance.settings.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawler.txt"));
            record_performances.Save();
            var perfDT = record_performances.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID);

            perfDT.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());

            perfDT.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "crawl_performances", true);
        }
Пример #5
0
        /// <summary>
        /// Called after the all modules at end of FRA
        /// </summary>
        /// <param name="__wRecord">The w record.</param>
        /// <param name="entry">The entry.</param>
        /// <param name="output">The output.</param>
        /// <returns></returns>
        public frontierRankingAlgorithmIterationRecord reportEndOfFRA(modelSpiderSiteRecord __wRecord, frontierRankingAlgorithmIterationRecord entry, spiderModuleData <spiderLink> output)
        {
            entry.output = output.active.Count;

            if (entry.inputTargets > entry.output)
            {
                entry.accumulation = entry.inputTargets - entry.output;
            }
            else
            {
                entry.drain = entry.output - entry.inputTargets;
            }

            Dictionary <string, spiderLink> urls = new Dictionary <string, spiderLink>();


            foreach (var pair in output.active)
            {
                urls.Add(pair.url, pair);
            }

            var assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls.Keys);

            entry.outputPotentialPrecission = assertion.relevant; ///[indexPageEvaluationEntryState.isRelevant].Count.GetRatio(assertion[indexPageEvaluationEntryState.haveEvaluationEntry].Count);

            assertion.performInfoGainEstimation(entry.PLleft);
            entry.PotOutputIP = assertion.IPnominal;

            entry.PotChangeIP = entry.PotOutputIP - entry.PotInputIP;

            entry.potentialPrecissionChange = entry.outputPotentialPrecission - entry.inputPotentialPrecission;



            entry.moduleUse = 0;

            foreach (var modPair in modRecords)
            {
                moduleIterationRecord moduleReport = modPair.Value.GetFirstWhere(nameof(moduleIterationRecord.iteration) + " = " + entry.iteration);
                if (moduleReport != null)
                {
                    entry.moduleUse++;

                    if (modPair.Key == typeof(languageModule).Name)
                    {
                        entry.accumulatedLanguage = moduleReport.accumulated;
                    }
                    else if (modPair.Key == typeof(structureModule).Name)
                    {
                        entry.accumulatedTemplate = moduleReport.accumulated;
                    }
                    else if (modPair.Key == typeof(templateModule).Name)
                    {
                        entry.accumulatedStructure = moduleReport.accumulated;
                    }
                }
            }



            entry.duration = DateTime.Now.Subtract(entry.start).TotalSeconds;


            generalRecords.AddOrUpdate(entry);

            return(entry);
        }
Пример #6
0
        public override void eventAtInitiationOfCrawlJob(crawlerDomainTaskMachine __machine, modelSpiderTestRecord tRecord)
        {
            imbWEMManager.index.domainIndexTable.recheck(imbWEMManager.index.pageIndexTable, output);

            //reportFolder = imbWEMManager.index.experimentEntry.sessionReportFolder;


            //String recordName = imbWEMManager.index.experimentEntry.SessionID.getFilename();

            string path = imbWEMManager.index.experimentEntry.sessionReportFolder.pathFor(recordFileName);

            records             = new objectTable <indexDomain>(path, true, __recordKeyProperty, name);
            records.description = "Side index";


            var           domains = records.GetList();
            List <string> __url   = new List <string>(); // http://www.
            Dictionary <string, indexDomain> dict = new Dictionary <string, indexDomain>();

            domains.ForEach(x => __url.Add(x.url));
            domains.ForEach(x => dict.Add(x.url, x));


            int dc_ik = 0;
            List <crawlerDomainTask> tasks = new List <crawlerDomainTask>();

            foreach (var task in __machine.items.items)
            {
                if (Enumerable.Any(__url, x => x == task.wRecord.instanceID)) // wRecord.instanceID = http://www.
                {
                    task.status = crawlerDomainTaskStatusEnum.aborted;
                    tasks.Add(task);
                }
                else
                {
                    if (imbWEMManager.settings.supportEngine.reportPlugIn_sideIndexer_UseIfPagesKnown)
                    {
                        indexDomain iDomainFromIndex = imbWEMManager.index.domainIndexTable.GetOrCreate(task.wRecord.instanceID);

                        records.AddOrUpdate(iDomainFromIndex, objectTableUpdatePolicy.updateIfHigher);

                        if (dict.ContainsKey(task.wRecord.instanceID))
                        {
                            indexDomain iDomain = dict[task.wRecord.instanceID];
                            if ((iDomain.relevantPages + iDomain.notRelevantPages) >= tRecord.instance.settings.limitTotalPageLoad)
                            {
                                dc_ik++;
                                tasks.Add(task);
                            }
                        }
                    }
                }
            }

            foreach (var task in __machine.items.items)
            {
            }

            int dc = 0;

            foreach (var task in tasks)
            {
                crawlerDomainTask t_out = null;
                if (__machine.items.items.TryDequeue(out t_out))
                {
                    dc++;
                }
            }

            aceLog.consoleControl.setAsOutput(output, "SideIndex");
            if (dc > 0)
            {
                output.log("DLCs processed in an earlier session: " + dc);
            }
            if (dc_ik > 0)
            {
                output.log("DLCs removed from schedule because the index has already enough pages loaded: " + dc_ik);
            }
        }