/// <summary> /// Reduces the dataset category. /// </summary> /// <param name="dataSet">The data set.</param> /// <param name="settings">The settings.</param> /// <param name="logger">The logger.</param> /// <returns></returns> public double ReduceDatasetCategory(WebSiteDocumentsSet dataSet, WebSiteDataSetReductionSettings settings, ILogBuilder logger) { //List<Double> reductions = new List<double>(); Int32 total_input = dataSet.CountDocumentsTotal(); List <WebSiteGraphDiagnosticMark> marks = new List <WebSiteGraphDiagnosticMark>(); if (settings.marksToRemove != WebSiteGraphDiagnosticMark.none) { marks = settings.marksToRemove.getEnumListFromFlags <WebSiteGraphDiagnosticMark>(); } List <WebSiteDocuments> toRemove = new List <WebSiteDocuments>(); foreach (WebSiteDocuments site in dataSet) { if (settings.marksToRemove != WebSiteGraphDiagnosticMark.none) { if (site.extensions.graph == null) { if (settings.logSiteLevel) { logger.log("Site _ [" + site.domain + "] _ flaged for removal because not having graph declared"); } } else { foreach (WebSiteGraphDiagnosticMark mark in marks) { if (site.extensions.graph.diagnosticResults.HasFlag(mark)) { if (settings.logSiteLevel) { logger.log("Site _ [" + site.domain + "] _ flaged for removal because of [" + mark.ToString() + "] web graph diagnostic mark"); } toRemove.Add(site); } } } } } foreach (WebSiteDocuments site in toRemove) { if (dataSet.Contains(site)) { dataSet.Remove(site); } } dataSet.RemoveEmptyDocuments(logger, settings.LimitSettings.minPageLimit, settings.LimitSettings.maxPageLimit); Int32 total_output = dataSet.CountDocumentsTotal(); Double average = total_output.GetRatio(total_input); if (settings.logCategoryLevel) { logger.log("Document count in _ [" + dataSet.name + "] _ reduced to: " + average.ToString("P2")); } return(average); }