Example #1
0
        public modelSpiderSiteRecord(string __testRunStamp, spiderWeb __instance) : base(__testRunStamp, __instance)
        {
            //logBuilder.isEnabled = imbWEMManager.settings.executionLog.doKeepSiteRec;


            domainInfo           = new domainAnalysis(instance.seedLink.url);
            iterationTableRecord = new objectTable <iterationPerformanceRecord>("key", "iteration_" + domainInfo.domainName.Replace(".", "_"));



            //moduleRecords.start()

            // domainInfo = new domainAnalysis(__instance.seedLink.url);


            // stats.Add(modelSpiderSideFields.mss_totalcrosslinks, 0, "Total crosslinks", "Total number of crosslinks detected among pages");

            //stats.Add(modelSpiderSiteTimelineEnum.tl_iteration, 0, "Iteration", "Final iteration count");
            //stats.Add(modelSpiderSiteTimelineEnum.tl_pagesloaded, 0, "Pages loaded", "Total count of pages loaded");
            //stats.Add(modelSpiderSiteTimelineEnum.tl_pagesdetected, 0, "Pages detected", "Total count of pages loaded");
            //stats.Add(modelSpiderSiteTimelineEnum.tl_pagesaccepted, 0, "Pages accepted", "Total count of pages loaded");
            //stats.Add(modelSpiderSiteTimelineEnum.tl_totallinks, 0, "Links detected", "Total links processed");
            //stats.Add(modelSpiderSiteTimelineEnum.tl_linksaccepted, 0, "Links accepted", "Total links processed");
            //stats.Add(modelSpiderSiteTimelineEnum.tl_activelinks, 0, "Ative links", "Active links left at the end of procedure");
            //stats.Add(modelSpiderSiteTimelineEnum.tl_stability, 0, "Pages loaded", "Total links processed");
        }
Example #2
0
        protected performanceBase(String __name)
        {
            name = __name;

            takes = new objectTable <T>(nameof(IPerformanceTake.idk), name.getCleanPropertyName());

            prepare();
        }
        public override void OnLoaded()
        {
            loger = new builderForLog(folder.pathFor("log.txt"), true, getWritableFileMode.appendFile);
            loger.log("Repository [" + name + "] accessed");

            siteTable             = new objectTable <imbMCWebSiteEntry>(folder.pathFor("siteTable.xml"), true, nameof(imbMCWebSiteEntry.domain), "siteTable");
            siteTable.description = "Index datatable with all stored MCWebSite repo-entries";
        }
Example #4
0
        /// <summary>
        /// Loads all data on
        /// </summary>
        /// <param name="pathOfExperimentSetupXML">Path pointing to experimentSetup.xml file</param>
        /// <param name="logger">The logger.</param>
        public void Load(String pathOfExperimentSetupXML, ILogBuilder logger, folderNode _folderRoot)
        {
            folderRoot = _folderRoot;
            folder     = new DirectoryInfo(Path.GetDirectoryName(pathOfExperimentSetupXML));

            report     = objectSerialization.loadObjectFromXML <experimentReport>(folder.pathFor("ReportSummary.xml"), logger);
            experiment = objectSerialization.loadObjectFromXML <experimentSetup>(folder.pathFor("experimentSetup.xml"), logger);
            //topperformers = report.bestPerformingClassifiers;
            topperformers = new objectTable <DocumentSetCaseCollectionReport>(folder.pathFor("TopPerformers.xml"), true, nameof(DocumentSetCaseCollectionReport.Name), "TopPerformers");
        }
        //public void ExportTextReport<T>(StringBuilder sb) T where
        //{
        //    var l = GetAllContainers();

        //    l.Sort((x, y) => -x.totalFrequency.CompareTo(y.totalFrequency));

        //    StringBuilder sb = new StringBuilder();
        //    Int32 i = 1;
        //    foreach (TFDFContainer container in TFDF)
        //    {
        //        pipelineTaskTFDFContentSubject s = new pipelineTaskTFDFContentSubject(container);
        //        pipelineTask<pipelineTaskTFDFContentSubject> task = new pipelineTask<pipelineTaskTFDFContentSubject>(s);
        //        sb.AppendLine(String.Format("[" + i.ToString("D5") + "] {0,-40} : DF {1,-5} - TF {2,-5} ", s.tfdf.indexForm, s.tfdf.documentFrequency, s.tfdf.totalFrequency));
        //        i++;
        //        sTasks.Add(task);
        //    }
        //}

        /// <summary>
        /// Gets the table with containers.
        /// </summary>
        /// <returns></returns>
        public objectTable <TFDFContainer> GetTableWithContainers()
        {
            objectTable <TFDFContainer> output = new objectTable <TFDFContainer>(nameof(TFDFContainer.indexForm), nameof(TFDFContainer));

            foreach (TFDFContainer c in items.Values)
            {
                output.Add(c);
            }

            return(output);
        }
Example #6
0
        public void CloseReport(folderNode folderToSaveInto)
        {
            foreach (var cl in modelMetrics)
            {
                if (cl.S1Measure > S1Measure)
                {
                    S1Measure = cl.S1Measure;
                    bestModel = cl;
                }
            }

            foreach (var cl in bestPerformingClassifiers)
            {
                if (cl.F1measure > BestF1)
                {
                    theBestPerformer = cl;
                }
                BestF1 = Math.Max(BestF1, cl.F1measure);
            }

            Duration = DateTime.Now.Subtract(start).TotalMinutes;

            if (folderToSaveInto != null)
            {
                var ds = DescribeSelf();
                var p  = folderToSaveInto.pathFor("ReportSummary.txt", imbSCI.Data.enums.getWritableFileMode.none, "Short summary on the results of the experiment");
                File.WriteAllLines(p, ds);

                objectSerialization.saveObjectToXML(this, folderToSaveInto.pathFor("ReportSummary.xml", imbSCI.Data.enums.getWritableFileMode.none, "XML Serialized ReportSummary object - for automatic multi-experiment results processing"));

                objectTable <DocumentSetCaseCollectionReport> tp = new objectTable <DocumentSetCaseCollectionReport>(folderToSaveInto.pathFor("TopPerformers.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "XML Serialized object - with best performing FVE models - for later results post-reporting"), false, nameof(DocumentSetCaseCollectionReport.Name), "TopPerformers");


                var mp = folderToSaveInto.pathFor("ModelMetrics.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "The FVE with highest S1 score, for this experiment");
                bestModel.saveObjectToXML(mp);


                tp.AddRange(bestPerformingClassifiers);
                DataTable dt = tp.GetDataTable();
                context.AddExperimentInfo(dt);
                dt.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, "Name", theBestPerformer.Name);

                dt.SetDescription("Overview table with the best performing FVE - vs - Classifier pairs of the experiment [" + experiment.name + "]");
                dt.GetReportAndSave(folderToSaveInto, appManager.AppInfo, "TopPerformers", true, context.tools.operation.doReportsInParalell);
                tp.Save(imbSCI.Data.enums.getWritableFileMode.overwrite);
            }
        }
Example #7
0
        public override void eventPluginInstalled()
        {
            // records = new objectTable<reportPlugIn_benchmarkResults>(recordPath, true, __recordKeyProperty, name);
            //  records.description = "Summary report on the most relevant evaluation metrics.";
            reportFolder = imbWEMManager.index.experimentEntry.sessionReportFolder;


            string recordName = imbWEMManager.index.experimentEntry.SessionID.getFilename();

            records             = new objectTable <reportPlugIn_benchmarkResults>(homeFolder.pathFor(recordName.add("results", "_")), true, __recordKeyProperty, name);
            records.description = "Summary report on the most relevant evaluation metrics.";


            record_performances = new objectTable <performanceRecord>(homeFolder.pathFor(recordName.add("performances", "_")), true, "TestID", name);

            record_moduleImpact = new objectTable <moduleFinalOverview>(homeFolder.pathFor(recordName.add("modules", "_")), true, "ModuleName", name);
        }
Example #8
0
        public DataTable GetAverageTable(experimentExecutionContext context)
        {
            objectTable <DocumentSetCaseCollectionReport> tp = new objectTable <DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), parent.validationCase.name + "_avg");

            foreach (var pair in avgReports)
            {
                tp.Add(pair.Value);
            }

            DataTable output   = tp.GetDataTable();
            String    foldName = parent.validationCase.name;

            parent.validationCase.context.AddExperimentInfo(output);
            output.SetDescription($"Aggregates of [{foldName}] evaluation - only averages");
            output.SetAdditionalInfoEntry("Report type", "Average per classifier");
            //output.AddExtra("Most relevant rows are annoted with [(mean)] word");


            return(output);
        }
Example #9
0
        public override void eventAtInitiationOfCrawlJob(crawlerDomainTaskMachine _machine, modelSpiderTestRecord tRecord)
        {
            plugin_settings = imbWEMManager.settings.supportEngine.plugIn_workload_settings;
            plugin_state.statePrepare(plugin_settings);

            string ad = plugin_settings.stepUp_start.ToString() + plugin_settings.stepUp_step.ToString();

            __recordPath = homeFolder.pathFor("results_" + tRecord.instance.name + ad + ".xml");

            records = new objectTable <reportPlugIn_workloadEntry>(recordPath, false, __recordKeyProperty, plugin_state.TestID);

            if (plugin_settings.stepUp_enabled)
            {
                plugin_state.pluginState = workloadPluginState.preparing;
                _machine.maxThreads      = plugin_settings.stepUp_start;
            }
            else
            {
                plugin_state.pluginState = workloadPluginState.disabled;
            }
        }
Example #10
0
        public DataTable GetFullValidationTable(experimentExecutionContext context)
        {
            objectTable <DocumentSetCaseCollectionReport> tp = new objectTable <DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), parent.validationCase.name + "_avg");

            foreach (var pair in this)
            {
                foreach (var r in pair.Value)
                {
                    tp.Add(r);
                }
            }

            DataTable output   = tp.GetDataTable();
            String    foldName = parent.validationCase.name;

            parent.validationCase.context.AddExperimentInfo(output);
            output.SetDescription($"Results of fold [{foldName}] evaluation, with all entries");
            output.SetAdditionalInfoEntry("Report type", "All entries");
            output.AddExtra("Most relevant rows are annoted with [(mean)] word");


            return(output);
        }
Example #11
0
 public frontierRankingAlgorithmDLCRecord()
 {
     generalRecords = new objectTable <frontierRankingAlgorithmIterationRecord>("name", name);
 }
Example #12
0
 protected performanceBase()
 {
     takes = new objectTable <T>(nameof(IPerformanceTake.idk), typeof(T).Name.getCleanPropertyName());
     prepare();
 }
Example #13
0
        public override void eventCrawlJobFinished(crawlerDomainTaskMachine __machine, modelSpiderTestRecord tRecord)
        {
            crawlerReportFolder = __machine.reporter.folder;

            var    wRecords   = tRecord.GetChildRecords();
            string fileprefix = tRecord.instance.name.getFilename();

            List <DataTable> iterationTimelines = new List <DataTable>();

            foreach (var wRecord in wRecords)
            {
                iterationTimelines.Add(wRecord.iterationTableRecord.GetDataTable());
            }
            int dlc_c = 0;

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                tRecord.frontierDLCDataTables[moduleIterationRecordSummary.fra_overview].GetAggregatedTable("fra_overview").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_overview".add(fileprefix, "_"), true);

                tRecord.frontierDLCDataTables[moduleIterationRecordSummary.all].GetAggregatedTable("fra_modules_all").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_all".add(fileprefix, "_"), true);


                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.language].GetAggregatedTable("fra_module_language").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_language_".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.structure].GetAggregatedTable("fra_modules_structure").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_structure_".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.template].GetAggregatedTable("fra_modules_template").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_template".add(fileprefix, "_"), true);
                }
                if (tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].Any())
                {
                    tRecord.frontierDLCDataTables[moduleIterationRecordSummary.diversity].GetAggregatedTable("fra_module_diversity").GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_module_diversity_".add(fileprefix, "_"), true);
                }


                string finalOverviewPath = crawlerReportFolder.pathFor("fra_modules_impact".add(fileprefix, "_"), getWritableFileMode.newOrExisting);
                objectTable <moduleFinalOverview> finalOverview = new objectTable <moduleFinalOverview>(finalOverviewPath, false, "ModuleName", "module_impact");
                finalOverview.description = "Aggregate (DLC and iterations) metrics on modules' impact to the result.";

                aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord> moduleIterationsByModule = new aceDictionarySet <moduleIterationRecordSummary, moduleIterationRecord>();
                List <moduleIterationRecordSummary> moduleActive = new List <moduleIterationRecordSummary>();

                foreach (var wRecord in wRecords)
                {
                    dlc_c++;
                    foreach (var pair in wRecord.frontierDLC.modRecords)
                    {
                        moduleIterationsByModule.Add(pair.Value.moduleSummaryEnum, pair.Value.GetList());
                        if (!moduleActive.Contains(pair.Value.moduleSummaryEnum))
                        {
                            moduleActive.Add(pair.Value.moduleSummaryEnum);
                        }
                    }
                }

                int modC = 0;
                List <moduleFinalOverview> modList = new List <moduleFinalOverview>();
                foreach (var modType in moduleActive)
                {
                    moduleFinalOverview mfo = new moduleFinalOverview();
                    mfo.deploy(tRecord.instance.name, modType, moduleIterationsByModule[modType], dlc_c);
                    modC += moduleIterationsByModule[modType].Count;
                    finalOverview.AddOrUpdate(mfo);
                    modList.Add(mfo);
                }

                moduleFinalOverview mfoSum = new moduleFinalOverview();


                mfoSum.deploySum(tRecord.instance.name, modList);
                finalOverview.AddOrUpdate(mfoSum);

                foreach (var mfo in modList)
                {
                    mfo.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);
                    finalOverview.AddOrUpdate(mfo);

                    record_moduleImpact.AddOrUpdate(mfo);
                }


                mfoSum.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);
                record_moduleImpact.AddOrUpdate(mfoSum);

                //    finalOverview.SaveAs(finalOverviewPath.add(".xml"));
                DataTable fover = finalOverview.GetDataTable(null, mfoSum.Crawler);

                fover.SetAggregationOriginCount(modC);
                fover.SetAggregationAspect(dataPointAggregationAspect.onTableMultiRow);
                fover.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_overview", true);


                record_moduleImpact.Save();
                var midt = record_moduleImpact.GetDataTable(null, "Module impacts");
                midt.AddExtra("The last benchmark metrics entry [" + imbWEMManager.index.experimentEntry.CrawlID + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());
                midt.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "fra_modules_impact_".add(fileprefix, "_"));
            }
            else
            {
                dlc_c = tRecord.children.Count();
            }

            if (iterationTimelines.Any())
            {
                DataTable crawlTimeline = iterationTimelines.GetAggregatedTable("Crawler_Timeline", dataPointAggregationAspect.overlapMultiTable);
                crawlTimeline.SetDescription("Iteration-synced aggregated performance timeline using DLC records [" + wRecords.Count + "] domains.");
                crawlTimeline.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionCrawlerFolder, imbWEMManager.authorNotation, "timeline_performance_".add(imbWEMManager.index.experimentEntry.Crawler));
            }
            //String atl = "timeline_performance".add(tRecord.instance.name, "_").add("xml", ".");

            var domainPerfList = tRecord.lastDomainIterationTable.GetList();

            var benchmark = new reportPlugIn_benchmarkResults(); //records.GetOrCreate(imbWEMManager.index.experimentEntry.TestID);

            tRecord.performance.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);

            tRecord.performance.jobTimeInMinutes = tRecord.cpuTaker.GetTimeSpanInMinutes();

            record_performances.AddOrUpdate(tRecord.performance);

            benchmark.SetTestIDAndSignature(tRecord.instance, imbWEMManager.index.experimentEntry.state, tRecord);


            benchmark.CrawlTime = tRecord.cpuTaker.GetTimeSpanInMinutes(); //tRecord.cpuTaker.GetTimeSpan().TotalMinutes; //.timeFinish.Subtract(tRecord.timeStart).TotalMinutes;


            benchmark.IP           = domainPerfList.Average(x => x.IP);
            benchmark.IPnominal    = domainPerfList.Average(x => x.IPnominal);
            benchmark.IP_collected = domainPerfList.Average(x => x.IP_collected);
            benchmark.Lm_collected = domainPerfList.Average(x => x.Lm_collected);
            benchmark.Lm_recall    = domainPerfList.Average(x => x.Lm_recall);
            benchmark.E_PP         = domainPerfList.Average(x => x.E_PP);
            benchmark.E_TP         = domainPerfList.Average(x => x.E_TP);
            benchmark.IP_recall    = domainPerfList.Average(x => x.IP_recall);
            benchmark.Page_recall  = domainPerfList.Average(x => x.Page_recall);
            benchmark.Term_recall  = domainPerfList.Average(x => x.Term_recall);

            var resourcesamples = tRecord.measureTaker.GetLastSamples(1000);
            var lastsample      = tRecord.measureTaker.GetLastTake();

            benchmark.DataLoad = lastsample.bytesLoadedTotal / benchmark.CrawlTime;
            benchmark.CPU      = resourcesamples.Average(x => x.cpuRateOfProcess);
            benchmark.RAM      = resourcesamples.Average(x => x.physicalMemory);

            records.AddOrUpdate(benchmark);
            records.Save();

            var dt = records.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID);

            dt.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());

            dt.SetAdditionalInfoEntry("DLC Threads - TC", __machine.maxThreads);
            dt.SetAdditionalInfoEntry("LoadTake - LT", tRecord.instance.settings.limitIterationNewLinks);
            dt.SetAdditionalInfoEntry("PageLoads - PL", tRecord.instance.settings.limitTotalPageLoad);
            dt.SetAdditionalInfoEntry("Sample size - DC", dlc_c);
            dt.SetAdditionalInfoEntry("Session ID", imbWEMManager.index.experimentEntry.SessionID);



            dt.GetReportAndSave(crawlerReportFolder, imbWEMManager.authorNotation, "result", true);

            benchmark.GetUserManualSaved(crawlerReportFolder.pathFor("crawler\\result.txt"));

            //  crawlTimeline.saveObjectToXML(homeFolder.pathFor(atl));
            //  crawlTimeline.saveObjectToXML(reportFolder.pathFor(atl));

            // all three modules summary

            imbWEMManager.settings.directReportEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_reportEngine.txt"));
            imbWEMManager.settings.crawlerJobEngine.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawlJobEngine.txt"));
            imbWEMManager.settings.executionLog.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_executionLogs.txt"));

            tRecord.instance.settings.GetUserManualSaved(crawlerReportFolder["crawler"].pathFor("settings_crawler.txt"));
            record_performances.Save();
            var perfDT = record_performances.GetDataTable(null, imbWEMManager.index.experimentEntry.CrawlID);

            perfDT.AddExtra("The last benchmark metrics entry [" + benchmark.Crawler + "] inserted on " + DateTime.Now.ToLongDateString() + " / " + DateTime.Now.ToLongTimeString());

            perfDT.GetReportAndSave(imbWEMManager.index.experimentEntry.sessionReportFolder, imbWEMManager.authorNotation, "crawl_performances", true);
        }
Example #14
0
 public frontierRankingAlgorithmDLCRecord(modelSpiderSiteRecord wRecord) //  string __dataSetName, string __dataSetDescription, folderNode __fileFolder = null) : base(__dataSetName, __dataSetDescription, __fileFolder)
 {
     generalRecords = new objectTable <frontierRankingAlgorithmIterationRecord>("name", name);
     deploy(wRecord);
 }
Example #15
0
 public override void OnLoaded()
 {
     pageTable             = new objectTable <imbMCWebPageEntry>(pageTablePath, true, nameof(imbMCWebPageEntry.HashCode), "pageTable");
     pageTable.description = "Index datatable with entries for each stored page within this MCWebSite repository";
 }
Example #16
0
        /// <summary>Performs post-processing of data collected by the workload plugin</summary>
        /// <remarks><para>Loads all saved DataTables, groups rows in averages for each measure group and creates summary table with all experiments</para></remarks>
        /// <param name="searchPattern">pattern used to select input files</param>
        /// <param name="groupColumn">column name used for row grouping</param>
        /// <param name="overviewColumns">columns to include in overview table</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runWorkloadData(
            [Description("pattern used to select input files")] string searchPattern = "results*.xml",
            [Description("column name used for row grouping")] string groupColumn    = "measureGroup")
        // [Description("columns to include in overview table")] String overviewColumns = "DataLoad,CrawlerIterations,ContentPages,dlcMaximum")
        {
            aceOperation_selectFiles(searchPattern, "index\\workload", true);

            folder = folder["index\\workload"];

            List <DataTable> tables = new List <DataTable>();

            dataPointAggregationType aggType = dataPointAggregationType.avg;

            int ci = 1;
            int c  = selectedFiles.Count();

            output.log("[" + c + "] DataTable in the cue.");


            List <DataTable> allTables = new List <DataTable>();
            DataSet          dSet      = new DataSet();


            aceDictionarySet <string, DataTable> byCrawler = new aceDictionarySet <string, DataTable>();
            aceDictionarySet <string, DataTableForStatistics> byCrawlerRT = new aceDictionarySet <string, DataTableForStatistics>();

            DataTableForStatistics rt = null;

            foreach (FileInfo fi in selectedFiles)
            {
                try
                {
                    objectTable <reportPlugIn_workloadEntry> workloadEntry = new objectTable <reportPlugIn_workloadEntry>(fi.FullName, true, "EntryID", "");

                    objectTable <reportPlugIn_workloadEntry> workloadGrouped = new objectTable <reportPlugIn_workloadEntry>("EntryID", "aggregated");

                    aceDictionarySet <int, reportPlugIn_workloadEntry> workloadGroups = workloadEntry.GetGroups <int>(groupColumn, "terminationWarning = 0");

                    collectionAggregationResultSet <reportPlugIn_workloadEntry> aggregateSet = new collectionAggregationResultSet <reportPlugIn_workloadEntry>();



                    foreach (var set in workloadGroups)
                    {
                        collectionAggregationResult <reportPlugIn_workloadEntry> aggregates = null;
                        aggregates = set.Value.GetAggregates(aggType);

                        var aggregate = aggregates[aggType];
                        aggregate.measureGroup = set.Key;
                        aggregate.EntryID      = set.Key.ToString("D5") + "_" + aggType.ToString();
                        workloadGrouped.AddOrUpdate(aggregate);
                        aggregateSet.Add(aggregate.EntryID + "_" + fi.Name, aggregates);
                    }

                    string filename = (fi.Name + "_" + groupColumn + "_" + aggType.ToString()).getFilename();

                    string n = reportPlugIn_workload_state.ExtractEntryID(aggregateSet.lastItem.EntryID) + dSet.Tables.Count.ToString("D2");

                    DataTable dt = workloadGrouped.GetDataTable(dSet, n);
                    dt.SetDescription("Collection of [" + aggregateSet.recordType.Name + "] records grouped by [" + groupColumn + "]");
                    dt.SetAggregationAspect(dataPointAggregationAspect.subSetOfRows);
                    dt.SetAggregationOriginCount(aggregateSet.Count);
                    dt.SetAdditionalInfoEntry("Aggregation Type:", aggType);
                    dt.SetAdditionalInfoEntry("Data source file:", fi.Name);

                    dt.SetAdditionalInfoEntries("Last", aggregateSet.lastItem, typeof(string));

                    dt.SetTitle(n);

                    byCrawler.Add(aggregateSet.firstItem.Crawler, dt);

                    // dt.TableName = n;
                    //   dSet.AddTable(dt);


                    rt = dt.GetReportAndSave(folder, imbWEMManager.authorNotation, n.getFilename(), true);
                    byCrawlerRT.Add(aggregateSet.firstItem.Crawler, rt);
                    response.AppendLine("[" + ci + " / " + c + "] DataTable [" + fi.Name + "] had [" + workloadGroups.Keys.Count() + "] groups. Result saved as: " + filename);
                    ci++;
                } catch (Exception ex)
                {
                    output.log("[" + ci + " / " + c + "] DataTable [" + fi.FullName + "] failed.");
                    output.log(ex.Message);
                }
            }



            output.log("[" + c + "] DataTable processed.");

            dSet.serializeDataSet("workload_all", folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);

            foreach (string key in byCrawler.Keys)
            {
                string  filename = key.getFilename();
                DataSet sd       = new DataSet(key);
                foreach (DataTable dti in byCrawler[key])
                {
                    sd.AddTable(dti.Copy());
                }

                sd.AddTable(byCrawlerRT[key].First().RenderLegend());
                sd.serializeDataSet(filename, folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);
            }
        }
Example #17
0
        /// <summary>Exporting domain list according to criteria specified</summary>
        /// <remarks><para>It will select domains using existing data. If index name not specified it will scan index repository and ask user to pick one</para></remarks>
        /// <param name="indexName">name of the index to harvest sample from - IndexID</param>
        /// <param name="minPages">required min. number of crawled/indexed pages in the doman--</param>
        /// <param name="minRelevant">required min. number of relevant pages in the index for the domain</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runIndexData(
            [Description("name of the index to harvest sample from - IndexID")] string indexName                = "MainIndex",
            [Description("required min. number of crawled/indexed pages in the doman--")] int minPages          = 30,
            [Description("required min. number of relevant pages in the index for the domain")] int minRelevant = 10)
        {
            if ((indexName == "*") || indexName.isNullOrEmpty())
            {
                List <string> indexList = imbWEMManager.index.GetIndexList();
                indexList.Add("*");
                aceTerminalInput.askForOption("Choose index to work with - or confirm * to load all indexes:", "*", indexList);
            }

            indexDatabaseStandalone indexDb = new indexDatabaseStandalone(indexName);



            imbWEMManager.index.OpenIndex(indexName, "plugin_dataLoader");

            imbWEMManager.index.pageIndexTable.ReadOnlyMode   = true;
            imbWEMManager.index.domainIndexTable.ReadOnlyMode = true;
            List <indexDomain> d_list = new List <indexDomain>();

            List <indexPage> pages = imbWEMManager.index.pageIndexTable.GetPagesAndDomains(indexPageEvaluationEntryState.inTheIndex, out d_list);

            aceDictionarySet <indexDomain, indexPage> dict = new aceDictionarySet <indexDomain, indexPage>();
            List <string>      list    = new List <string>();
            List <indexDomain> domains = new List <indexDomain>();

            foreach (indexDomain domain in d_list)
            {
                List <indexPage> pl = Enumerable.Where(pages, x => x.url.Contains(domain.domain)).ToList();
                dict.Add(domain, pl);
                int prc = 0;
                if (pl.Count() > minPages)
                {
                    foreach (indexPage ip in pl)
                    {
                        if (ip.relevancyText == "isRelevant")
                        {
                            prc++;
                        }
                        if (prc > minRelevant)
                        {
                            output.AppendLine($" {domain.domain} P[_{pl.Count()}_] Pr[_{prc}_] --> accepted, stop counting");
                            //  domains.Add(domain);
                            list.Add(domain.domain);
                            break;
                        }
                    }
                }
            }

            //  var domains = imbWEMManager.index.domainIndexTable.GetWhere(nameof(indexDomain.relevantPages) + " > " + minRelevant);
            // domains = domains.Where(x => ((x.relevantPages + x.notRelevantPages) > minPages)).ToList();
            string sampleName = indexName.add("Pa" + minPages + "Pr" + minRelevant, "_").add("txt", ".");

            domains.ForEach(x => list.Add(x.url));


            objectTable <indexDomain> dTable = new objectTable <indexDomain>("url", sampleName);

            domains.ForEach(x => dTable.AddOrUpdate(x));

            dTable.GetDataTable(null, sampleName).GetReportAndSave(folder, imbWEMManager.authorNotation, sampleName, true);

            folder = imbWEMManager.index.folder;

            string p = folder.pathFor(sampleName);

            list.saveContentOnFilePath(p);

            output.log("Exported sample saved to: " + p);
        }
Example #18
0
        /// <summary>Recalculating time by importing dt_dataLoad exported table and updating performance exports for each crawl</summary>
        /// <remarks><para>It will load the results record for opened session to find all crawls, and import all dt_dataLoad Excel tables to sum sampling periods</para></remarks>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_repairRecoverTime()
        {
            var __recordKeyProperty = "TestID";
            var homeFolder          = new folderNode("index\\benchmark", "Home folder of plugin: benchmark ", "Internal data for pluting benchmark");
            var recordName          = imbWEMManager.index.experimentEntry.SessionID;

            var records = new objectTable <reportPlugIn_benchmarkResults>(homeFolder.pathFor(recordName.add("results", "_")), true, __recordKeyProperty, name);

            records.description = "Summary report on the most relevant evaluation metrics.";

            var record_performances = new objectTable <performanceRecord>(homeFolder.pathFor(recordName.add("performances", "_")), true, "TestID", name);

            var record_moduleImpact = new objectTable <moduleFinalOverview>(homeFolder.pathFor(recordName.add("modules", "_")), true, "ModuleName", name);

            // <---- making crawler list
            List <string> crawlerList = new List <string>();

            List <reportPlugIn_benchmarkResults> allRecords = records.GetList();
            var reportFolder = imbWEMManager.index.experimentEntry.sessionReportFolder;

            Dictionary <string, string> pathsForResultExcel = new Dictionary <string, string>();

            Dictionary <string, folderNode> foldersForResultExcel = new Dictionary <string, folderNode>();


            foreach (reportPlugIn_benchmarkResults result in allRecords)
            {
                crawlerList.Add(result.Crawler);
                output.AppendLine("Crawl found: " + result.Crawler);

                string pathCrawlerId = result.Crawler.Replace("-", "");

                folderNode resultNode = reportFolder[pathCrawlerId.ToUpper() + "\\crawler\\data"];

                string pathForData = resultNode.pathFor("dc_dataload_" + result.Crawler.ToLower() + ".csv");
                //String pathForResult = reportFolder.pathFor(pathCrawlerId);

                foldersForResultExcel.Add(result.Crawler, resultNode);

                //foldersForResultExcel.Add(result.Crawler, pathForResult);


                // String path = reportFolder.pathFor(pathForData, getWritableFileMode.existing);
                output.AppendLine("Loading datatable: " + pathForData);

                DataTable dataTable = null;
                dataTable = pathForData.deserializeDataTable(dataTableExportEnum.csv);
                output.AppendLine("DataTable loaded - rows[" + dataTable.Rows.Count + "]");
                DataColumn periodColumn = dataTable.Columns["Period"];
                double     periodSum    = 0;
                foreach (DataRow dr in dataTable.Rows)
                {
                    string read  = dr[periodColumn].ToString().Replace(",", ".");
                    double readD = double.Parse(read);
                    periodSum += readD;
                }

                output.AppendLine("Total execution time in seconds: " + periodSum.ToString("F5"));

                result.CrawlTime = periodSum / ((double)60);
                records.AddOrUpdate(result);
            }

            foreach (reportPlugIn_benchmarkResults result in allRecords)
            {
                folderNode resultFolder = foldersForResultExcel[result.Crawler];

                records.GetDataTable().GetReportAndSave(resultFolder, imbWEMManager.authorNotation, "results_timefix", true);

                output.AppendLine("Repaired result table saved to: " + resultFolder.path);
                // <---- fixing crawler results
            }
        }
Example #19
0
        public override void eventAtInitiationOfCrawlJob(crawlerDomainTaskMachine __machine, modelSpiderTestRecord tRecord)
        {
            imbWEMManager.index.domainIndexTable.recheck(imbWEMManager.index.pageIndexTable, output);

            //reportFolder = imbWEMManager.index.experimentEntry.sessionReportFolder;


            //String recordName = imbWEMManager.index.experimentEntry.SessionID.getFilename();

            string path = imbWEMManager.index.experimentEntry.sessionReportFolder.pathFor(recordFileName);

            records             = new objectTable <indexDomain>(path, true, __recordKeyProperty, name);
            records.description = "Side index";


            var           domains = records.GetList();
            List <string> __url   = new List <string>(); // http://www.
            Dictionary <string, indexDomain> dict = new Dictionary <string, indexDomain>();

            domains.ForEach(x => __url.Add(x.url));
            domains.ForEach(x => dict.Add(x.url, x));


            int dc_ik = 0;
            List <crawlerDomainTask> tasks = new List <crawlerDomainTask>();

            foreach (var task in __machine.items.items)
            {
                if (Enumerable.Any(__url, x => x == task.wRecord.instanceID)) // wRecord.instanceID = http://www.
                {
                    task.status = crawlerDomainTaskStatusEnum.aborted;
                    tasks.Add(task);
                }
                else
                {
                    if (imbWEMManager.settings.supportEngine.reportPlugIn_sideIndexer_UseIfPagesKnown)
                    {
                        indexDomain iDomainFromIndex = imbWEMManager.index.domainIndexTable.GetOrCreate(task.wRecord.instanceID);

                        records.AddOrUpdate(iDomainFromIndex, objectTableUpdatePolicy.updateIfHigher);

                        if (dict.ContainsKey(task.wRecord.instanceID))
                        {
                            indexDomain iDomain = dict[task.wRecord.instanceID];
                            if ((iDomain.relevantPages + iDomain.notRelevantPages) >= tRecord.instance.settings.limitTotalPageLoad)
                            {
                                dc_ik++;
                                tasks.Add(task);
                            }
                        }
                    }
                }
            }

            foreach (var task in __machine.items.items)
            {
            }

            int dc = 0;

            foreach (var task in tasks)
            {
                crawlerDomainTask t_out = null;
                if (__machine.items.items.TryDequeue(out t_out))
                {
                    dc++;
                }
            }

            aceLog.consoleControl.setAsOutput(output, "SideIndex");
            if (dc > 0)
            {
                output.log("DLCs processed in an earlier session: " + dc);
            }
            if (dc_ik > 0)
            {
                output.log("DLCs removed from schedule because the index has already enough pages loaded: " + dc_ik);
            }
        }