コード例 #1
0
        /// <summary>
        /// Reduces the dataset category.
        /// </summary>
        /// <param name="dataSet">The data set.</param>
        /// <param name="settings">The settings.</param>
        /// <param name="logger">The logger.</param>
        /// <returns></returns>
        public double ReduceDatasetCategory(WebSiteDocumentsSet dataSet, WebSiteDataSetReductionSettings settings, ILogBuilder logger)
        {
            //List<Double> reductions = new List<double>();

            Int32 total_input = dataSet.CountDocumentsTotal();

            List <WebSiteGraphDiagnosticMark> marks = new List <WebSiteGraphDiagnosticMark>();

            if (settings.marksToRemove != WebSiteGraphDiagnosticMark.none)
            {
                marks = settings.marksToRemove.getEnumListFromFlags <WebSiteGraphDiagnosticMark>();
            }

            List <WebSiteDocuments> toRemove = new List <WebSiteDocuments>();

            foreach (WebSiteDocuments site in dataSet)
            {
                if (settings.marksToRemove != WebSiteGraphDiagnosticMark.none)
                {
                    if (site.extensions.graph == null)
                    {
                        if (settings.logSiteLevel)
                        {
                            logger.log("Site _ [" + site.domain + "] _ flaged for removal because not having graph declared");
                        }
                    }
                    else
                    {
                        foreach (WebSiteGraphDiagnosticMark mark in marks)
                        {
                            if (site.extensions.graph.diagnosticResults.HasFlag(mark))
                            {
                                if (settings.logSiteLevel)
                                {
                                    logger.log("Site _ [" + site.domain + "] _ flaged for removal because of [" + mark.ToString() + "] web graph diagnostic mark");
                                }

                                toRemove.Add(site);
                            }
                        }
                    }
                }
            }

            foreach (WebSiteDocuments site in toRemove)
            {
                if (dataSet.Contains(site))
                {
                    dataSet.Remove(site);
                }
            }

            dataSet.RemoveEmptyDocuments(logger, settings.LimitSettings.minPageLimit, settings.LimitSettings.maxPageLimit);

            Int32 total_output = dataSet.CountDocumentsTotal();

            Double average = total_output.GetRatio(total_input);

            if (settings.logCategoryLevel)
            {
                logger.log("Document count in _ [" + dataSet.name + "] _ reduced to: " + average.ToString("P2"));
            }

            return(average);
        }