Exemple #1
0
        /// <summary>Recalculating time by importing dt_dataLoad exported table and updating performance exports for each crawl</summary>
        /// <remarks><para>It will load the results record for opened session to find all crawls, and import all dt_dataLoad Excel tables to sum sampling periods</para></remarks>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_repairRecoverTime()
        {
            var __recordKeyProperty = "TestID";
            var homeFolder          = new folderNode("index\\benchmark", "Home folder of plugin: benchmark ", "Internal data for pluting benchmark");
            var recordName          = imbWEMManager.index.experimentEntry.SessionID;

            var records = new objectTable <reportPlugIn_benchmarkResults>(homeFolder.pathFor(recordName.add("results", "_")), true, __recordKeyProperty, name);

            records.description = "Summary report on the most relevant evaluation metrics.";

            var record_performances = new objectTable <performanceRecord>(homeFolder.pathFor(recordName.add("performances", "_")), true, "TestID", name);

            var record_moduleImpact = new objectTable <moduleFinalOverview>(homeFolder.pathFor(recordName.add("modules", "_")), true, "ModuleName", name);

            // <---- making crawler list
            List <string> crawlerList = new List <string>();

            List <reportPlugIn_benchmarkResults> allRecords = records.GetList();
            var reportFolder = imbWEMManager.index.experimentEntry.sessionReportFolder;

            Dictionary <string, string> pathsForResultExcel = new Dictionary <string, string>();

            Dictionary <string, folderNode> foldersForResultExcel = new Dictionary <string, folderNode>();


            foreach (reportPlugIn_benchmarkResults result in allRecords)
            {
                crawlerList.Add(result.Crawler);
                output.AppendLine("Crawl found: " + result.Crawler);

                string pathCrawlerId = result.Crawler.Replace("-", "");

                folderNode resultNode = reportFolder[pathCrawlerId.ToUpper() + "\\crawler\\data"];

                string pathForData = resultNode.pathFor("dc_dataload_" + result.Crawler.ToLower() + ".csv");
                //String pathForResult = reportFolder.pathFor(pathCrawlerId);

                foldersForResultExcel.Add(result.Crawler, resultNode);

                //foldersForResultExcel.Add(result.Crawler, pathForResult);


                // String path = reportFolder.pathFor(pathForData, getWritableFileMode.existing);
                output.AppendLine("Loading datatable: " + pathForData);

                DataTable dataTable = null;
                dataTable = pathForData.deserializeDataTable(dataTableExportEnum.csv);
                output.AppendLine("DataTable loaded - rows[" + dataTable.Rows.Count + "]");
                DataColumn periodColumn = dataTable.Columns["Period"];
                double     periodSum    = 0;
                foreach (DataRow dr in dataTable.Rows)
                {
                    string read  = dr[periodColumn].ToString().Replace(",", ".");
                    double readD = double.Parse(read);
                    periodSum += readD;
                }

                output.AppendLine("Total execution time in seconds: " + periodSum.ToString("F5"));

                result.CrawlTime = periodSum / ((double)60);
                records.AddOrUpdate(result);
            }

            foreach (reportPlugIn_benchmarkResults result in allRecords)
            {
                folderNode resultFolder = foldersForResultExcel[result.Crawler];

                records.GetDataTable().GetReportAndSave(resultFolder, imbWEMManager.authorNotation, "results_timefix", true);

                output.AppendLine("Repaired result table saved to: " + resultFolder.path);
                // <---- fixing crawler results
            }
        }
Exemple #2
0
        public override void eventAtInitiationOfCrawlJob(crawlerDomainTaskMachine __machine, modelSpiderTestRecord tRecord)
        {
            imbWEMManager.index.domainIndexTable.recheck(imbWEMManager.index.pageIndexTable, output);

            //reportFolder = imbWEMManager.index.experimentEntry.sessionReportFolder;


            //String recordName = imbWEMManager.index.experimentEntry.SessionID.getFilename();

            string path = imbWEMManager.index.experimentEntry.sessionReportFolder.pathFor(recordFileName);

            records             = new objectTable <indexDomain>(path, true, __recordKeyProperty, name);
            records.description = "Side index";


            var           domains = records.GetList();
            List <string> __url   = new List <string>(); // http://www.
            Dictionary <string, indexDomain> dict = new Dictionary <string, indexDomain>();

            domains.ForEach(x => __url.Add(x.url));
            domains.ForEach(x => dict.Add(x.url, x));


            int dc_ik = 0;
            List <crawlerDomainTask> tasks = new List <crawlerDomainTask>();

            foreach (var task in __machine.items.items)
            {
                if (Enumerable.Any(__url, x => x == task.wRecord.instanceID)) // wRecord.instanceID = http://www.
                {
                    task.status = crawlerDomainTaskStatusEnum.aborted;
                    tasks.Add(task);
                }
                else
                {
                    if (imbWEMManager.settings.supportEngine.reportPlugIn_sideIndexer_UseIfPagesKnown)
                    {
                        indexDomain iDomainFromIndex = imbWEMManager.index.domainIndexTable.GetOrCreate(task.wRecord.instanceID);

                        records.AddOrUpdate(iDomainFromIndex, objectTableUpdatePolicy.updateIfHigher);

                        if (dict.ContainsKey(task.wRecord.instanceID))
                        {
                            indexDomain iDomain = dict[task.wRecord.instanceID];
                            if ((iDomain.relevantPages + iDomain.notRelevantPages) >= tRecord.instance.settings.limitTotalPageLoad)
                            {
                                dc_ik++;
                                tasks.Add(task);
                            }
                        }
                    }
                }
            }

            foreach (var task in __machine.items.items)
            {
            }

            int dc = 0;

            foreach (var task in tasks)
            {
                crawlerDomainTask t_out = null;
                if (__machine.items.items.TryDequeue(out t_out))
                {
                    dc++;
                }
            }

            aceLog.consoleControl.setAsOutput(output, "SideIndex");
            if (dc > 0)
            {
                output.log("DLCs processed in an earlier session: " + dc);
            }
            if (dc_ik > 0)
            {
                output.log("DLCs removed from schedule because the index has already enough pages loaded: " + dc_ik);
            }
        }